# Example Notebook

In [26]:
%load_ext autoreload
%autoreload 2

import json
from pathlib import Path
import shutil
import pandas as pd
import logging
import traceback
import mlflow
import pickle

from wattile.entry_point import init_logging
from wattile.data_reading import read_dataset_from_file
from wattile.buildings_processing import prep_for_rnn
from wattile.models import ModelFactory


PROJECT_DIRECTORY = Path().resolve().parents[1]

# TODO: move this address to config
    # will require a deployed tracker instance
    # OR cleaner local setup
    # OR both - discuss w/ team
# mlflow.set_tracking_uri("http://127.0.0.1:5000")
# logging.getLogger("mlflow").setLevel(logging.DEBUG)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
"""
For this example, we will be using a modified config provided by JK.
Check out the docs for an explaination of each config.
"""
with open(PROJECT_DIRECTORY / "tests" / "ftlb" / "ftlb.json", "r") as f:
    configs = json.load(f)

exp_dir = PROJECT_DIRECTORY / "notebooks" / "exp_dir"
if exp_dir.exists():
    shutil.rmtree(exp_dir)
exp_dir.mkdir()

configs["data_output"]["exp_dir"] = str(exp_dir)
configs["data_input"]["data_dir"] = str(PROJECT_DIRECTORY / configs["data_input"]["data_dir"])

configs

{'feature_extraction': 'no',
 'scenario_filter': 'no',
 'scenario_wrapper': 'no',
 'metric_relevancy': 'none',
 'metric_wrapper': 'Interval Score',
 'num_cores_wrapper': 1,
 'mi_rand_frac': 1.0,
 'threshold_basis': 'percentile',
 'threshold_relevancy': 0.0,
 'threshold_redundancy': 1.0,
 'target_clean_size_weeks': 1,
 'generate_plot': True,
 'generate_plot_inputdata': True,
 'data_input': {'data_dir': '/Users/jsmith2/Code/IC/Wattile/tests/ftlb',
  'data_config': 'ftlb_config.json',
  'start_time': '2020-07-01T00:00:00-07:00',
  'end_time': '2023-04-01T00:00:00-07:00',
  'predictor_columns': [],
  'target_var': 'FTLB CHW Meter CHW Energy Rate'},
 'data_output': {'exp_dir': '/Users/jsmith2/Code/IC/Wattile/notebooks/exp_dir',
  'plot_comparison': True,
  'plot_comparison_portion_start': 0.0,
  'plot_comparison_portion_end': 1.0},
 'data_processing': {'feat_time': {'month_of_year': ['sincos'],
   'day_of_week': ['sincos'],
   'hour_of_day': ['sincos'],
   'holidays': False},
  'resample': 

In [28]:
"""
Firstly, we will read the raw data from the dataset (exported from SS).
Checkout the docs for an indepth explaination of necessary dataset structure.
"""
# data = read_dataset_from_file(configs)
# data

data = pd.read_csv(f'{PROJECT_DIRECTORY}/tests/ftlb/test_predictor_data.csv')
data['ts'] = pd.to_datetime(data['ts'])
data.set_index('ts', inplace=True)

# with open(f'{PROJECT_DIRECTORY}/tests/ftlb/data/test_predictor_data.pickle', "rb") as f:
#     f.seek(0)
#     data = pickle.load(f)


In [29]:
"""
Next, we call `prep_for_rnn` do preform some data preprocessing.
"""
train_df, val_df = prep_for_rnn(configs, data)
train_df

INFO:43003:Number of features: 57


Unnamed: 0,FTLB CHW Meter CHWST_min_lag24,FTLB HW Meter HWST_min_lag24,SRRL BMS Dew Point Temperature_min_lag24,SRRL BMS Diffuse Horizontal Irradiance_min_lag24,SRRL BMS Direct Normal Irradiance_min_lag24,SRRL BMS Dry Bulb Temperature_min_lag24,SRRL BMS Global Horizontal Irradiance_min_lag24,SRRL BMS Rainfall_min_lag24,SRRL BMS Relative Humidity_min_lag24,SRRL BMS Snow Depth_min_lag24,...,SRRL BMS Wet Bulb Temperature_mean,SRRL BMS Wind Direction at 19'_mean,SRRL BMS Wind Speed at 19'_mean,sin_HOD_mean,cos_HOD_mean,sin_DOW_mean,cos_DOW_mean,sin_MOY_mean,cos_MOY_mean,FTLB CHW Meter CHW Energy Rate
0,47.208561,170.305649,28.071018,0.0,0.000000,34.309399,0.0,0.0,74.669998,7.066929,...,31.229020,195.366668,6.293580,0.031924,0.999311,-0.781831,0.62349,0.017213,0.999852,-999.0
1,47.250946,168.441498,27.956015,0.0,0.000000,35.058201,0.0,0.0,69.349998,7.047244,...,30.663012,205.860002,5.082169,0.162113,0.986591,-0.781831,0.62349,0.017213,0.999852,-999.0
2,47.266205,167.243805,28.037127,0.0,0.000000,36.514400,0.0,0.0,68.900002,7.047244,...,30.860957,201.006667,4.159629,0.226343,0.973872,-0.781831,0.62349,0.017213,0.999852,-999.0
3,47.228340,167.146454,28.098965,0.0,0.000000,35.952801,0.0,0.0,68.680000,6.996063,...,30.759676,189.966668,4.765111,0.288596,0.957262,-0.781831,0.62349,0.017213,0.999852,-999.0
4,47.270725,165.560120,27.659019,0.0,0.000000,35.846600,0.0,0.0,68.769997,7.051181,...,30.351633,245.726668,4.573326,0.469801,0.882567,-0.781831,0.62349,0.017213,0.999852,-999.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,47.425583,166.068573,24.345255,0.0,0.313940,29.368401,0.0,0.0,76.559998,6.933071,...,27.399201,52.245201,3.266619,-0.291697,0.956322,-0.781831,0.62349,0.017213,0.999852,-999.0
72,47.378113,165.109680,24.669039,0.0,0.183134,28.740200,0.0,0.0,80.900002,6.937008,...,27.548633,323.411333,2.903031,-0.228496,0.973359,-0.781831,0.62349,0.017213,0.999852,-999.0
73,47.485485,163.610397,24.815577,0.0,0.193597,28.094000,0.0,0.0,83.199997,6.921260,...,27.547280,63.906199,3.242310,-0.164346,0.986219,-0.781831,0.62349,0.017213,0.999852,-999.0
74,47.352116,161.658951,24.845083,0.0,0.308709,27.438801,0.0,0.0,86.300003,6.917323,...,27.295567,45.328667,3.109283,-0.099493,0.994856,-0.781831,0.62349,0.017213,0.999852,-999.0


In [30]:
print(f'NaNs in train_df? {train_df.isnull().values.any()}')
print(f'shape of train_df? {train_df.shape}')
print(f'Null in val_df? {train_df.isnull().values.any()}')
print(f'shape of val_df? {val_df.shape}')

NaNs in train_df? False
shape of train_df? (76, 1426)
Null in val_df? False
shape of val_df? (20, 1426)


In [31]:
flow_params = {}
flow_params['data_processing'] = configs['data_processing']
# adding this maxes out MLFLow's byte limits for params
# (https://github.com/mlflow/mlflow/issues/3931)
# TODO: speak to team and break up portions into artifacts
# (data_proc is _likely_ an artifact whereas learning_algo contains params)
# flow_params['learning_algorithm'] = configs['learning_algorithm']

In [32]:
"""
Finally, we are ready to train our model!
"""
# MLFlow could be integrated into local logging
init_logging(local_results_dir=configs["data_output"]["exp_dir"])

try:
    # mlflow.start_run()
    # mlflow.log_params(flow_params)
    model = ModelFactory.create_model(configs)
    model.train(train_df, val_df)
except BaseException as exception:
    logging.error(f"Exception Name: {type(exception).__name__}")
    logging.error(exception)
    print(traceback.format_exc())

# mlflow.end_run()

INFO:43003:PID: 43003
INFO:43003:AlfaModel model created.Writing to /Users/jsmith2/Code/IC/Wattile/notebooks/exp_dir.
INFO:43003:Available train batch factors: [1, 2, 4, 19, 38, 76]Requested number of batches per epoch - Train:                 26, val: 1Actual number of batches per epoch - Train:                 19, val: 1Number of data samples in each batch - Train: 4, val: 20
INFO:43003:A new lstm RNN model instantiated
INFO:43003:Number of cores available: 10
INFO:43003:Number of logical processors available: 10
INFO:43003:Initial memory statistics (GB): {'total': 68.719476736, 'available': 8.044752896, 'percent': 88.3, 'used': 8.43167744, 'free': 0.501506048}
INFO:43003:Starting to train the model for 300 epochs!


Logging to: /Users/jsmith2/Code/IC/Wattile/notebooks/exp_dir/output.out, PID: 43003


  mid_train_error_stats = mid_train_error_stats.append(
INFO:43003:Epoch: 27 Iteration: 500. Train_loss: nan. val_loss: nan, LR: 0.001
  mid_train_error_stats = mid_train_error_stats.append(
INFO:43003:Epoch: 53 Iteration: 1000. Train_loss: nan. val_loss: nan, LR: 0.001
  mid_train_error_stats = mid_train_error_stats.append(
INFO:43003:Epoch: 79 Iteration: 1500. Train_loss: nan. val_loss: nan, LR: 0.001
  mid_train_error_stats = mid_train_error_stats.append(
INFO:43003:Epoch: 106 Iteration: 2000. Train_loss: nan. val_loss: nan, LR: 0.001
  mid_train_error_stats = mid_train_error_stats.append(
INFO:43003:Epoch: 132 Iteration: 2500. Train_loss: nan. val_loss: nan, LR: 0.001
  mid_train_error_stats = mid_train_error_stats.append(
INFO:43003:Epoch: 158 Iteration: 3000. Train_loss: nan. val_loss: nan, LR: 0.001
  mid_train_error_stats = mid_train_error_stats.append(
INFO:43003:Epoch: 185 Iteration: 3500. Train_loss: nan. val_loss: nan, LR: 0.001
  mid_train_error_stats = mid_train_error_sta

Traceback (most recent call last):
  File "/var/folders/ff/_m2f295j211gw_m6vpb0m8nnss_h9r/T/ipykernel_43003/2316594135.py", line 11, in <module>
    model.train(train_df, val_df)
  File "/Users/jsmith2/Code/IC/Wattile/wattile/models/AlgoMainRNNBase.py", line 163, in train
    self.run_training(train_loader, val_loader, val_df)
  File "/Users/jsmith2/Code/IC/Wattile/wattile/models/alfa_model.py", line 630, in run_training
    predictions, targets, errors, Q_vals, hist_data = self.test_processing(
  File "/Users/jsmith2/Code/IC/Wattile/wattile/models/alfa_model.py", line 270, in test_processing
    tester = np.histogram(resid[:, i], bins=200)
  File "<__array_function__ internals>", line 200, in histogram
  File "/Users/jsmith2/Code/IC/Wattile/.venv/lib/python3.9/site-packages/numpy/lib/histograms.py", line 780, in histogram
    bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
  File "/Users/jsmith2/Code/IC/Wattile/.venv/lib/python3.9/site-packages/numpy/lib/histograms.p

In [33]:
f"""
See {exp_dir} for the results.
"""

'\nSee /Users/jsmith2/Code/IC/Wattile/notebooks/exp_dir for the results.\n'