In [None]:
import os
import plotly.express as px
import pandas as pd
import numpy as np
import json

from utils import *
from train import *
from eval import *
import wandb
wandb.login()

import warnings
warnings.filterwarnings('ignore')

# Set seed
np.random.seed(42)

# Set working directory
os.chdir(r"..") # should be the git repo root directory, checking below:
print("Current working directory: " + os.getcwd())
assert os.getcwd()[-8:] == "WattCast"
dir_path = os.path.join(os.getcwd(), 'data', 'clean_data')
model_dir = os.path.join(os.getcwd(), 'models')



In [None]:
def train_eval_light():

    wandb.init(project="WattCast_tuning")
    wandb.config.update(config_run)
    config = wandb.config

    print("Getting data...")

    pipeline, ts_train_piped, ts_val_piped, ts_test_piped, ts_train_weather_piped, ts_val_weather_piped, ts_test_weather_piped, trg_train_inversed, trg_val_inversed, trg_test_inversed = data_pipeline(config)

    print("Getting model instance...")
    model_instance = get_model(config)
    model_instance, _ = train_models([model_instance], ts_train_piped, ts_train_weather_piped, ts_val_piped, ts_val_weather_piped)

    print("Evaluating model...")
    predictions, score = predict_testset(model_instance[0], 
                                  ts_test_piped[config.longest_ts_test_idx], 
                                  ts_test_weather_piped[config.longest_ts_test_idx],
                                  config.n_lags, config.n_ahead, config.eval_stride, pipeline,
                                  )


    print("Plotting predictions...")
    df_compare = pd.concat([trg_test_inversed.pd_dataframe(), predictions], axis=1).dropna()
    df_compare.columns = ['target', 'prediction']
    fig = px.line(df_compare, title='Predictions vs. Test Set')

    wandb.log({'eval_loss': score})
    wandb.log({'predictions': fig})
    wandb.finish()



In [27]:

# See what keys are in the h5py data file
locations_dict, resolutions_dict = get_hdf_keys(dir_path)

print("Locations: ", locations_dict)
print("Resolutions: ", resolutions_dict)

for scale, locations in locations_dict.items():
    if scale == '5_building.h5':
        for location in locations:
            df_train = pd.read_hdf(os.path.join(dir_path, scale), key=f'{location}/60min/train_target')
            df_val = pd.read_hdf(os.path.join(dir_path, scale), key=f'{location}/60min/val_target')
            df_test = pd.read_hdf(os.path.join(dir_path, scale), key=f'{location}/60min/test_target')
            fig = px.line(df_train, title=f'{scale}: {location}')
            fig.add_trace(px.line(df_val, title='Validation Set').data[0])
            fig.add_trace(px.line(df_test, title='Test Set').data[0])
            fig.show()



Locations:  {'1_county.h5': ['Los_Angeles', 'New_York', 'Sacramento'], '2_town.h5': ['town_0', 'town_1', 'town_2'], '3_village.h5': ['village_0', 'village_1', 'village_2'], '4_neighborhood.h5': ['neighborhood_0', 'neighborhood_1', 'neighborhood_2'], '5_building.h5': ['building_0', 'building_1', 'building_2']}
Resolutions:  {'1_county.h5': ['60min'], '2_town.h5': ['15min', '60min'], '3_village.h5': ['15min', '60min'], '4_neighborhood.h5': ['60min'], '5_building.h5': ['60min']}


### Running

In [None]:
# run parameters

sweeps = 20

scale_location_pairs = (
    # ('1_county', 'Sacramento'),
    # ('1_county', 'New_York'),
    #('2_town', 'town_0'),
    # ('2_town', 'town_1'),
    # ('2_town', 'town_2'),
    # ('3_village', 'village_1'),
    # ('3_village', 'village_2'),
    #('3_village', 'village_0'),
    # ('4_neighborhood', 'neighborhood_0'),
    # ('4_neighborhood', 'neighborhood_1'),
    # ('4_neighborhood', 'neighborhood_2'),
    # ('5_building', 'building_0'),
    # ('5_building', 'building_1'),
    ('5_building', 'building_2'),
      )



models = [
        'rf',
         'xgb', 
        # 'gru', 
        'lgbm',  
        # 'nbeats',
        #'tft'
        ]


for scale, location in scale_location_pairs:

    for model in models:
        # placeholder initialization of config file (will be updated in train_eval_light())
        config_run = {
            'spatial_scale': scale,
            'temp_resolution': 60,
            'location': location,
            'model': model,
            'horizon_in_hours': 24,
            'lookback_in_hours': 24,
            'boxcox': True,
            'liklihood': None,
            'weather': True,
            'holiday': True,
            'datetime_encodings': False,
        }
        
        with open(f'sweep_configurations/config_sweep_{model}.json', 'r') as fp:
            sweep_config = json.load(fp)                  

        sweep_config['name'] = model + 'sweep' + config_run['spatial_scale'] + '_' + config_run['location'] + '_' + str(config_run['temp_resolution'])

        sweep_id = wandb.sweep(sweep_config, project="WattCast_tuning")
        wandb.agent(sweep_id, train_eval_light, count=sweeps)


### Degbugging

In [18]:
model = 'xgb'
scale = '5_building'
location = 'building_2'


config_run = {
    'spatial_scale': scale,
    'temp_resolution': 60,
    'location': location,
    'model': model,
    'horizon_in_hours': 24,
    'lookback_in_hours': 24,
    'boxcox': True,
    'liklihood': None,
    'weather': True,
    'holiday': True,
    'datetime_encodings': False,
}

with open(f'sweep_configurations/config_sweep_{model}.json', 'r') as fp:
    sweep_config = json.load(fp)   


config = Config().from_dict(config_run)


In [19]:
pipeline, ts_train_piped, ts_val_piped, ts_test_piped, ts_train_weather_piped, ts_val_weather_piped, ts_test_weather_piped, trg_train_inversed, trg_val_inversed, trg_test_inversed = data_pipeline(config)

In [29]:
model_instance = get_model(config)

In [30]:
type(model_instance)

darts.models.forecasting.xgboost.XGBModel

In [31]:
model_instance, _ = train_models([model_instance], ts_train_piped, 
                                    #ts_train_weather_piped, 
                                    ts_val_piped, 
                                    #ts_val_weather_piped
                                    )


ValueError: Unable to build any training samples of the target series at index 0 and the corresponding covariate series; There is no time step for which all required lags are available and are not NaN values.
ValueError: Unable to build any training samples of the target series at index 0 and the corresponding covariate series; There is no time step for which all required lags are available and are not NaN values.


Training XGBModel
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\Users\nik\Desktop\Berkeley_Projects\WattCast\bin\train.py", line 535, in train_models
  File "c:\Users\nik\miniconda3\envs\gpu2\lib\site-packages\darts\models\forecasting\xgboost.py", line 216, in fit
    super().fit(
  File "c:\Users\nik\miniconda3\envs\gpu2\lib\site-packages\darts\models\forecasting\regression_model.py", line 482, in fit
    self._fit_model(
  File "c:\Users\nik\miniconda3\envs\gpu2\lib\site-packages\darts\models\forecasting\regression_model.py", line 364, in _fit_model
    training_samples, training_labels = self._create_lagged_data(
  File "c:\Users\nik\miniconda3\envs\gpu2\lib\site-packages\darts\models\forecasting\regression_model.py", line 327, in _create_lagged_data
    training_samples, training_labels, _ = _create_lagged_data(
  File "c:\Users\nik\miniconda3\envs\gpu2\lib\site-packages\darts\utils\data\tabularization.py", line 148, in _create_lagged_data
    raise_if(
  File "c:\Users\nik\miniconda3\envs\gpu2\lib\

In [24]:
config.eval_stride

4

In [28]:
predictions, score = predict_testset(model_instance[0], 
                                ts_test_piped[config.longest_ts_test_idx], 
                                ts_test_weather_piped[config.longest_ts_test_idx],
                                config.n_lags, config.n_ahead, config.eval_stride, pipeline,
                                )

Predicting test set...


ValueError: Timestamp must be between 2017-12-01 00:00:00 and 2017-12-31 18:00:00


ValueError: Timestamp must be between 2017-12-01 00:00:00 and 2017-12-31 18:00:00

In [None]:
px.line(predictions)