In [1]:
import json
from pathlib import Path
import shutil
import datetime as dt
from pprint import pprint

import pandas as pd

from wattile.entry_point import init_logging
from wattile.data_reading import read_dataset_from_file
from wattile.data_processing import prep_for_rnn
from wattile.models import ModelFactory

PROJECT_DIRECTORY = /Users/jsmith2/Code/@ic/Wattile/wattile


In [2]:
PROJECT_DIRECTORY = Path().resolve().parent
print(PROJECT_DIRECTORY)

MODEL_CONFIGS = []
MODEL_TARGET_DIRS = []
MODEL_NAMES = [
    'model-1'
]

/Users/jsmith2/Code/@ic/Wattile_examples/ex-1-skyspark-demo


In [3]:
"""
Loads the configuration files for the models we wish to train
"""

for model in MODEL_NAMES:
    with open(PROJECT_DIRECTORY / "data" / "configs" / f"{model}.config.json", "r") as f:
        tmp_conf = json.load(f)

    exp_dir = PROJECT_DIRECTORY /  "models" / model

    tmp_conf["data_output"]["exp_dir"] = str(exp_dir)
    tmp_conf["data_input"]["data_dir"] = str(PROJECT_DIRECTORY / "data" / "Headquarters")
    # Be sure the model's use_case is set to 'prediction'.
    tmp_conf["learning_algorithm"]["use_case"] = "prediction"

    MODEL_CONFIGS.append(tmp_conf)
    MODEL_TARGET_DIRS.append(exp_dir)

## Model 1

In [4]:
"""
Init the model with it's configs. Be sure to switch it's use case to prediction.
"""

init_logging(local_results_dir=MODEL_CONFIGS[0]["data_output"]["exp_dir"])
model = ModelFactory.create_model(MODEL_CONFIGS[0])

model

Logging to: /Users/jsmith2/Code/@ic/Wattile_examples/ex-1-skyspark-demo/models/model-1/output.out, PID: 59726


<wattile.models.alfa_model.AlfaModel at 0x2a8c51e20>

In [5]:
"""
If we want to get predictions for only one row, we have to find the time range needed to predict the nominal time.

we can use `get_input_window_for_output_time` to figure this out.
"""
nominal_time = pd.Timestamp(year=2024, month=7, day=2, tz=dt.timezone.utc)

start_time, end_time = model.get_input_window_for_output_time(nominal_time)
freq = model.configs["data_processing"]["resample"]["bin_interval"]
inclusive = model.configs["data_processing"]["resample"]["bin_label"]

predictors = model.configs["data_input"]["predictor_columns"]
target_var = model.configs["data_input"]["target_var"]

print(f"""
    To use this model to predict {target_var} for {nominal_time}, we will need to feed it data:
        - starting at {start_time}
        - ending at {end_time}
        - a rough frequency of {freq}
        - predictors named {predictors}
""")


    To use this model to predict Headquarters ElecMeter-Main kW for 2024-07-02 00:00:00+00:00, we will need to feed it data:
        - starting at 2024-06-30 00:00:00+00:00
        - ending at 2024-07-02 01:00:00+00:00
        - a rough frequency of 60min
        - predictors named ['Richmond, VA Humidity', 'Richmond, VA Temp']



In [6]:
"""
Great, now lets make some dummy data that matches those specs.
"""

data = pd.DataFrame(index=pd.date_range(start_time, end_time, freq=freq, inclusive=inclusive))
for predictor in predictors:
    data[predictor] = data.index.hour * 100 + data.index.minute

# even though we are predicting, we must fill the target column with dummy data. It's not used though.
data[target_var] = data.index.hour * 100 + data.index.minute


data.head

<bound method NDFrame.head of                            Richmond, VA Humidity  Richmond, VA Temp  \
2024-06-30 00:00:00+00:00                      0                  0   
2024-06-30 01:00:00+00:00                    100                100   
2024-06-30 02:00:00+00:00                    200                200   
2024-06-30 03:00:00+00:00                    300                300   
2024-06-30 04:00:00+00:00                    400                400   
2024-06-30 05:00:00+00:00                    500                500   
2024-06-30 06:00:00+00:00                    600                600   
2024-06-30 07:00:00+00:00                    700                700   
2024-06-30 08:00:00+00:00                    800                800   
2024-06-30 09:00:00+00:00                    900                900   
2024-06-30 10:00:00+00:00                   1000               1000   
2024-06-30 11:00:00+00:00                   1100               1100   
2024-06-30 12:00:00+00:00                   120

In [7]:
"""
Let's prep those for the model.
"""
train_df, val_df = prep_for_rnn(MODEL_CONFIGS[0], data)
val_df

Unnamed: 0,"Richmond, VA Humidity_min_lag8","Richmond, VA Temp_min_lag8",sin_HOD_min_lag8,cos_HOD_min_lag8,DOW_binary_reg_0_min_lag8,DOW_binary_reg_1_min_lag8,DOW_binary_reg_2_min_lag8,DOW_binary_reg_3_min_lag8,DOW_binary_reg_4_min_lag8,DOW_binary_reg_5_min_lag8,...,DOW_binary_reg_0_mean,DOW_binary_reg_1_mean,DOW_binary_reg_2_mean,DOW_binary_reg_3_mean,DOW_binary_reg_4_mean,DOW_binary_reg_5_mean,DOW_binary_reg_6_mean,sin_MOY_mean,cos_MOY_mean,Headquarters ElecMeter-Main kW
2024-07-02 00:00:00+00:00,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.025818,-0.999667,0


In [8]:
"""
One line in, one prediction (with many quantiles) out.
"""
results = model.predict(val_df)

results