In [1]:
import os, sys
import plotly.express as px
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

import darts
from darts.dataprocessing.transformers.boxcox import BoxCox
from darts.models import LightGBMModel, XGBModel, LinearRegressionModel, NBEATSModel, BlockRNNModel, RandomForest
from darts.metrics import smape, mape, mase, mse, rmse, r2_score, mae
from darts.dataprocessing.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, RobustScaler   
from darts.dataprocessing.transformers.scaler import Scaler
from darts.utils.missing_values import extract_subseries



from pytorch_lightning.callbacks import ModelCheckpoint
import torch
from wandb.xgboost import WandbCallback


from utils import *
from train_eval import *
import wandb
wandb.login()

import warnings
warnings.filterwarnings('ignore')

# Set seed
np.random.seed(42)

# Set working directory
os.chdir(r"..") # should be the git repo root directory, checking below:
print("Current working directory: " + os.getcwd())
assert os.getcwd()[-8:] == "WattCast"
dir_path = os.path.join(os.getcwd(), 'data', 'clean_data')
model_dir = os.path.join(os.getcwd(), 'models')



Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnikolaushouben[0m ([33mwattcast[0m). Use [1m`wandb login --relogin`[0m to force relogin


Current working directory: c:\Users\nik\Desktop\Berkeley_Projects\WattCast


In [2]:
def train_eval_light():

    wandb.init(project="WattCast_tuning")
    wandb.config.update(config_run)
    config = wandb.config

    print("Getting data...")

    pipeline, ts_train_piped, ts_val_piped, ts_test_piped, ts_train_weather_piped, ts_val_weather_piped, ts_test_weather_piped, trg_train_inversed, trg_val_inversed, trg_test_inversed = data_pipeline(config)

    print("Getting model instance...")
    model = get_model(config)
    model, runtime = train_models([model], ts_train_piped, ts_train_weather_piped, ts_val_piped, ts_val_weather_piped)

    print("Evaluating model...")
    predictions, score = predict_testset(model[0], 
                                  ts_test_piped[config.longest_ts_test_idx], 
                                  ts_test_weather_piped[config.longest_ts_test_idx],
                                  config.n_lags, config.n_ahead, config.eval_stride, pipeline,
                                  )


    print("Plotting predictions...")
    df_compare = pd.concat([trg_test_inversed.pd_dataframe(), predictions], axis=1).dropna()
    df_compare.columns = ['target', 'prediction']
    fig = px.line(df_compare, title='Predictions vs. Test Set')

    wandb.log({'eval_loss': score})
    wandb.log({'predictions': fig})
    wandb.finish()



In [3]:
# See what keys are in the h5py data file
get_hdf_keys(dir_path)

({'1_county.h5': ['Los_Angeles', 'New_York', 'Sacramento'],
  '2_town.h5': ['town_0', 'town_1', 'town_2'],
  '3_village.h5': ['village_0', 'village_1', 'village_2'],
  '4_neighborhood.h5': ['germany'],
  '5_household.h5': ['household_0', 'household_1', 'household_2'],
  '6_apartment.h5': ['apartment_0', 'apartment_1', 'apartment_2']},
 {'1_county.h5': ['60min'],
  '2_town.h5': ['15min', '60min'],
  '3_village.h5': ['15min', '60min'],
  '4_neighborhood.h5': ['15min', '60min'],
  '5_household.h5': ['15min', '60min'],
  '6_apartment.h5': ['15min', '5min', '60min']})

In [3]:
# run parameters

sweeps = 20

scale_location_pairs = (
    ('4_neighborhood', 'germany'),
     #('5_household', 'household_0'),
      )



models = [
        # 'rf',
        # 'xgb', 
        # 'gru', 
        # 'lgbm',  
        # 'nbeats',
        'transformer'


        ]

for scale, location in scale_location_pairs:

    for model in models:

        config_run = {
            'spatial_scale': scale,
            'temp_resolution': 60,
            'location': location,
            'model': model,
            'horizon_in_hours': 24,
            'lookback_in_hours': 24,
            'boxcox': True,
            'liklihood': None,
            'weather': True,
            'holiday': True,
            'datetime_encodings': False,
        }

        with open(f'sweep_configurations/config_sweep_{model}.json', 'r') as fp:
            sweep_config = json.load(fp)                  

        sweep_config['name'] = model + 'sweep' + config_run['spatial_scale'] + '_' + config_run['location'] + '_' + str(config_run['temp_resolution'])

        sweep_id = wandb.sweep(sweep_config, project="WattCast_tuning")
        wandb.agent(sweep_id, train_eval_light, count=sweeps)


Create sweep with ID: vqc6k5o0
Sweep URL: https://wandb.ai/wattcast/Wattcast_tuning/sweeps/vqc6k5o0


[34m[1mwandb[0m: Agent Starting Run: vmzpa3ge with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	min_child_weight: 10
[34m[1mwandb[0m: 	n_estimators: 1000
[34m[1mwandb[0m: 	objective: reg:squarederror
[34m[1mwandb[0m: 	reg_lambda: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…



Getting data...


Specified past encoders in `add_encoders` at model creation but model does not accept past covariates. past encoders will be ignored.


Getting model instance...
Training LightGBMModel
[1]	valid_0's l2: 0.0378291
[2]	valid_0's l2: 0.0332792
[3]	valid_0's l2: 0.0290724
[4]	valid_0's l2: 0.0261448
[5]	valid_0's l2: 0.0233986
[6]	valid_0's l2: 0.021353
[7]	valid_0's l2: 0.0193728
[8]	valid_0's l2: 0.0175784
[9]	valid_0's l2: 0.0165509
[10]	valid_0's l2: 0.0153847
[11]	valid_0's l2: 0.0144719
[12]	valid_0's l2: 0.0137234
[13]	valid_0's l2: 0.0129757
[14]	valid_0's l2: 0.0125227
[15]	valid_0's l2: 0.0125926
[16]	valid_0's l2: 0.0121905
[17]	valid_0's l2: 0.0122701
[18]	valid_0's l2: 0.0118627
[19]	valid_0's l2: 0.0119068
[20]	valid_0's l2: 0.0115671
[21]	valid_0's l2: 0.0116625
[22]	valid_0's l2: 0.0114916
[23]	valid_0's l2: 0.0115635
[24]	valid_0's l2: 0.0113387
[25]	valid_0's l2: 0.0113631
[26]	valid_0's l2: 0.011194
[27]	valid_0's l2: 0.0112365
[28]	valid_0's l2: 0.0110893
[29]	valid_0's l2: 0.0111272
[30]	valid_0's l2: 0.0110452
[31]	valid_0's l2: 0.0110151
[32]	valid_0's l2: 0.0109618
[33]	valid_0's l2: 0.0109156
[34]	

0,1
eval_loss,▁

0,1
eval_loss,0.08297


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
