In [1]:
import os
import plotly.express as px
import pandas as pd
import numpy as np
import json

from utils import *
from train import *
from evaluation import *
import wandb
wandb.login()

import warnings
warnings.filterwarnings('ignore')

# Set seed
np.random.seed(42)

# Set working directory
os.chdir(r"..") # should be the git repo root directory, checking below:
print("Current working directory: " + os.getcwd())
assert os.getcwd()[-8:] == "WattCast"
dir_path = os.path.join(os.getcwd(), 'data', 'clean_data')
model_dir = os.path.join(os.getcwd(), 'models')



  from .autonotebook import tqdm as notebook_tqdm
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnikolaushouben[0m ([33mwattcast[0m). Use [1m`wandb login --relogin`[0m to force relogin


Current working directory: /Users/nikolaushouben/Desktop/WattCast


In [None]:
def train_eval_tuning():

    wandb.init(project="WattCast_tuning")
    wandb.config.update(config_run)
    config = wandb.config

    print("Getting data...")

    pipeline, ts_train_piped, ts_val_piped, ts_test_piped, ts_train_weather_piped, ts_val_weather_piped, ts_test_weather_piped, trg_train_inversed, trg_val_inversed, trg_test_inversed = data_pipeline(config)

    print("Getting model instance...")
    model_instance = get_model(config)
    model_instance, _ = train_models([model_instance], ts_train_piped, ts_train_weather_piped, ts_val_piped, ts_val_weather_piped)

    print("Evaluating model...")
    predictions, score = predict_testset(model_instance[0], 
                                  ts_test_piped[config.longest_ts_test_idx], 
                                  ts_test_weather_piped[config.longest_ts_test_idx],
                                  config.n_lags, config.n_ahead, config.eval_stride, pipeline,
                                  )


    print("Plotting predictions...")
    df_compare = pd.concat([trg_test_inversed.pd_dataframe(), predictions], axis=1).dropna()
    df_compare.columns = ['target', 'prediction']
    fig = px.line(df_compare, title='Predictions vs. Test Set')

    wandb.log({'eval_loss': score})
    wandb.log({'predictions': fig})
    wandb.finish()



### Running

In [None]:
# run parameters

sweeps = 20

scale_location_pairs = (
    # ('1_county', 'Sacramento'),
    # ('1_county', 'New_York'),
    ('1_county', 'Los_Angeles'),
    #('2_town', 'town_0'),
    # ('2_town', 'town_1'),
    # ('2_town', 'town_2'),
    # ('3_village', 'village_1'),
    # ('3_village', 'village_2'),
    #('3_village', 'village_0'),
    # ('4_neighborhood', 'neighborhood_0'),
    # ('4_neighborhood', 'neighborhood_1'),
    # ('4_neighborhood', 'neighborhood_2'),
    # ('5_building', 'building_0'),
    # ('5_building', 'building_1'),
    #('5_building', 'building_2'),
      )



models = [
        #'rf',
        'xgb', 
        # 'gru', 
        #'lgbm',  
        # 'nbeats',
        #'tft'
        ]


for scale, location in scale_location_pairs:

    for model in models:
        # placeholder initialization of config file (will be updated in train_eval_light())
        config_run = {
            'spatial_scale': scale,
            'temp_resolution': 60,
            'location': location,
            'model': model,
            'horizon_in_hours': 24,
            'lookback_in_hours': 24,
            'boxcox': True,
            'liklihood': None,
            'weather': True,
            'holiday': True,
            'datetime_encodings': False,
        }
        
        with open(f'sweep_configurations/config_sweep_{model}.json', 'r') as fp:
            sweep_config = json.load(fp)                  

        sweep_config['name'] = model + 'sweep' + config_run['spatial_scale'] + '_' + config_run['location'] + '_' + str(config_run['temp_resolution'])

        sweep_id = wandb.sweep(sweep_config, project="WattCast_tuning")
        wandb.agent(sweep_id, train_eval_tuning, count=sweeps)


### Degbugging

In [2]:
scale = '1_county'
location = 'Los_Angeles'
model = 'xgb'

config_run = {
    'spatial_scale': scale,
    'temp_resolution': 60,
    'location': location,
    'model': model,
    'horizon_in_hours': 24,
    'lookback_in_hours': 24,
    'boxcox': True,
    'liklihood': None,
    'weather': True,
    'holiday': True,
    'datetime_encodings': False,
}

In [5]:
from train import Config

config = Config().from_dict(config_run)

In [7]:
pipeline, ts_train_piped, ts_val_piped, ts_test_piped, ts_train_weather_piped, ts_val_weather_piped, ts_test_weather_piped, trg_train_inversed, trg_val_inversed, trg_test_inversed = data_pipeline(config)

In [20]:
df_val

component,LDWP_load_GW
datetime,Unnamed: 1_level_1
2017-01-01 00:00:00,0.577518
2017-01-01 01:00:00,0.503271
2017-01-01 02:00:00,0.409643
2017-01-01 03:00:00,0.334178
2017-01-01 04:00:00,0.265506
...,...
2017-02-01 19:00:00,1.026357
2017-02-01 20:00:00,1.005094
2017-02-01 21:00:00,0.914752
2017-02-01 22:00:00,0.815764


In [22]:
df_train = pd.concat([ts.pd_dataframe() for ts in ts_train_piped],axis = 0)

df_val = pd.concat([ts.pd_dataframe() for ts in ts_val_piped],axis = 0)

df_test = pd.concat([ts.pd_dataframe() for ts in ts_test_piped],axis = 0)

fig = px.line(pd.concat([ts.pd_dataframe() for ts in ts_train_piped],axis = 0))

fig.add_scatter(x=df_val.index, y=df_val['LDWP_load_GW'], name='val')

fig.add_scatter(x=df_test.index, y=df_test['LDWP_load_GW'], name='test')



fig.show()

In [46]:
train = pd.read_hdf('data/clean_data/1_county.h5', key= 'Los_Angeles/60min/train_target')
val = pd.read_hdf('data/clean_data/1_county.h5', key= 'Los_Angeles/60min/val_target')
test = pd.read_hdf('data/clean_data/1_county.h5', key= 'Los_Angeles/60min/test_target')

# plot like above

fig = px.line(train)

fig.add_scatter(x=val.index, y=val['LDWP_load_GW'], name='val')
fig.add_scatter(x=test.index, y=test['LDWP_load_GW'], name='test')

fig.show()


In [92]:
from darts.dataprocessing.transformers.boxcox import BoxCox
from darts.dataprocessing.transformers.scaler import Scaler
from darts.dataprocessing import Pipeline


# into darts format
ts_train = darts.TimeSeries.from_dataframe(
    train, freq=str(config.temp_resolution) + "min"  # type: ignore
)
ts_train = extract_subseries(ts_train)

ts_val = darts.TimeSeries.from_dataframe(
    val, freq=str(config.temp_resolution) + "min"  # type: ignore
)
ts_val = extract_subseries(ts_val)

ts_test = darts.TimeSeries.from_dataframe(
    test, freq=str(config.temp_resolution) + "min"  # type: ignore
)
ts_test = extract_subseries(ts_test)

# Preprocessing Pipeline
pipeline = Pipeline(  # missing values have been filled in the 'data_prep.ipynb'
    [
        BoxCox(),
        Scaler(MinMaxScaler()),
    ]
)


ts_train_piped = pipeline.fit_transform(ts_train[1])
ts_val_piped = pipeline.transform(ts_val[0])
ts_test_piped = pipeline.transform(ts_test[0])

In [86]:
df_train = pd.concat([ts.pd_dataframe() for ts in ts_train_piped],axis = 0)

df_val = pd.concat([ts.pd_dataframe() for ts in ts_val_piped],axis = 0)

df_test = pd.concat([ts.pd_dataframe() for ts in ts_test_piped],axis = 0)

fig = px.line(pd.concat([ts.pd_dataframe() for ts in ts_train_piped],axis = 0))

fig.add_scatter(x=df_val.index, y=df_val['LDWP_load_GW'], name='val')

fig.add_scatter(x=df_test.index, y=df_test['LDWP_load_GW'], name='test')


fig.show()

KeyboardInterrupt: 

In [93]:
df_train = ts_train_piped.pd_dataframe()

df_val = ts_val_piped.pd_dataframe()

df_test = ts_test_piped.pd_dataframe()

fig = px.line(df_train)

fig.add_scatter(x=df_val.index, y=df_val['LDWP_load_GW'], name='val')

fig.add_scatter(x=df_test.index, y=df_test['LDWP_load_GW'], name='test')


fig.show()

In [100]:
px.line(ts_train[1].pd_dataframe())