## Hierarchical Forecasting with Different level's of Data

In [1]:
import pandas as pd
import sys
sys.path.insert(0, '../')

import constants as const
import src.utils as util
import src.calculate_errors as err

import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [2]:
def calculate_grid_error(model_path):
    data = pd.read_csv(f'../ts_data/grid.csv', index_col=[0])
    look_back = 14 * 7  # 14 hours in to 7 days

    # train, val, test split
    train, val, test = util.split_hourly_data(data, look_back)
    train_df = train[['power']]
    
    results_df = pd.read_csv(f'../{model_path}/grid.csv', index_col=[0])
    test_sample = results_df['power'].values
    forecasts = results_df['fc'].values
    horizon = 14
    
    mean_err, error_dist = err.test_errors_nrmse(train_df.values, test_sample, forecasts, horizon)
    return mean_err, error_dist

In [3]:
def read_loss_curve(path):
    return pd.read_pickle(f'../{path}/training_loss_grid_iteration')

def plot_loss(loss_array, models):

    fig = make_subplots(rows = len(loss_array), shared_xaxes=True)
    
    i =0
    for loss in loss_array:
        fig.add_trace(go.Scatter(y= loss['loss'] , name = f'{models[i]}_train-loss' ), row = i+1, col=1)
        fig.add_trace(go.Scatter(y= loss['val_loss'], name = f'{models[i]}_val-loss'), row = i+1, col=1)
        i +=1
        
    fig.update_xaxes(title="Epochs", row=i, col=1)
    fig.show()
    
def show_loss(path_list, model_names):
    loss_vals = []
    for path in path_list:
        loss_vals.append(read_loss_curve(path))
    plot_loss(loss_vals, model_names)

## Model 1 - Architecture

<img src="../images/New_images/model1.png">

In [4]:
path1 = 'combined_nn_results/refined_models/model1'
rmse1, dist1 = calculate_grid_error(path1)

In [5]:
rmse1

0.4689518104839537

## Model 2 - Architecture

<img src="../images/new_images/model2.png">

In [6]:
path2 = 'combined_nn_results/refined_models/model2'
rmse2, dist2 = calculate_grid_error(path2)

In [7]:
rmse2

0.4851028450736508

## Model 3 - Architecture

<img src="../images/new_images/model3.png">

In [8]:
# dense layer 14 output from each branch
path3 = 'combined_nn_results/refined_models/model3'
rmse3, dist3 = calculate_grid_error(path3)

In [9]:
rmse3

0.40207853214251055

## Model 4 - Architecture 

<img src="../images/new_images/model4.png">

In [10]:
path4 = 'combined_nn_results/refined_models/model4'
rmse4, dist4 = calculate_grid_error(path4)

In [11]:
rmse4

0.48270205794177873

## Model 5 - Architecture

<img src="../images/new_images/model5.png">

In [13]:
path5 = 'combined_nn_results/refined_models/model5'
rmse5, dist5 = calculate_grid_error(path5)

In [14]:
rmse5

0.4746874638641852

## Loss Curves

In [16]:
show_loss([path1, path2, path3, path4, path5], ['model 1', 'model 2', 'model 3', 'model 4', 'model 5'])

## Plot the forecasts

In [17]:
def change_df(dataframe):
    dataframe.index = pd.to_datetime(dataframe.reset_index()['date_str'])
    dataframe = dataframe.resample("1H").mean()
    return dataframe

In [18]:
arima = pd.read_csv(f'../benchmark_results/arima/grid.csv', index_col=[0])
arima = change_df(arima)

In [19]:
tbats = pd.read_csv(f'../benchmark_results/tbats/grid.csv', index_col=[0])
tbats = change_df(tbats)

In [20]:
model1_fc = pd.read_csv(f'../{path1}/grid.csv', index_col=[0])
model1_fc = change_df(model1_fc)

In [21]:
model2_fc = pd.read_csv(f'../{path2}/grid.csv', index_col=[0])
model2_fc = change_df(model2_fc)

In [22]:
model3_fc = pd.read_csv(f'../{path3}/grid.csv', index_col=[0])
model3_fc = change_df(model3_fc)

In [23]:
model4_fc = pd.read_csv(f'../{path4}/grid.csv', index_col=[0])
model4_fc = change_df(model4_fc)

In [24]:
model5_fc = pd.read_csv(f'../{path5}/grid.csv', index_col=[0])
model5_fc = change_df(model5_fc)

In [25]:
path_cnn = 'cnn_results/tcn_new/dilation_2/final_results'

cnn_uni = pd.read_csv(f'../{path_cnn}/grid.csv', index_col=[0])
cnn_uni = change_df(cnn_uni)

In [26]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = model1_fc.index, y = model1_fc['power'], name = 'target generation'))
fig.add_trace(go.Scatter(x = model1_fc.index, y = arima['average_fc'], name = 'arima'))
fig.add_trace(go.Scatter(x = model1_fc.index, y = model1_fc['fc'], name = 'Combine Grid+PC CNN forecasts'))
fig.add_trace(go.Scatter(x = model1_fc.index, y = model3_fc['fc'], name = 'Combine PC NN forecasts'))
fig.add_trace(go.Scatter(x = model1_fc.index, y = model4_fc['fc'], name = 'Combine Grid+PC NN forecasts'))
fig.add_trace(go.Scatter(x = model1_fc.index, y = model5_fc['fc'], name = 'Combine PC NN Dense forecasts'))



fig.update_yaxes(title = "Generation (kW)")
fig.show()
fig.write_html('forecasts.html')

## Let's have a look at the data scaling

In [68]:
from sklearn.preprocessing import StandardScaler
def create_window_data(file_index, lookback):
    filename = const.TS[file_index]

    horizon = 14  # day ahead forecast
    data = pd.read_csv(f'../ts_data/{filename}.csv', index_col=[0])
    print(filename)
    # 14 hours in to 7 days
    look_back = 14 * lookback

    # train, val, test split
    train, val, test = util.split_hourly_data(data, look_back)

    scaler = StandardScaler()
    scaler.fit(train.values)
    train_array = scaler.transform(train.values)
    val_array = scaler.transform(val.values)
    test_array = scaler.transform(test.values)

    train_df = pd.DataFrame(train_array, columns=data.columns)
    val_df = pd.DataFrame(val_array, columns=data.columns)
    test_df = pd.DataFrame(test_array, columns=data.columns)
    
    return test_df

In [69]:
lookback = 7

window_grid = create_window_data(0, lookback)
window_pc_6010 = create_window_data(7, lookback)
window_pc_6014 = create_window_data(8, lookback)
window_pc_6011 = create_window_data(9, lookback)
window_pc_6280 = create_window_data(10, lookback)
window_pc_6281 = create_window_data(11, lookback)
window_pc_6284 = create_window_data(12, lookback)

grid
6010
6014
6011
6280
6281
6284


In [70]:
# window_pc_6010

In [71]:
fig = go.Figure()
fig.add_trace(go.Scatter(y = window_grid['power'], name= 'grid_data'))
fig.add_trace(go.Scatter(y = window_pc_6010['power'], name = 'pc_6010_data'))
fig.add_trace(go.Scatter(y = window_pc_6014['power'], name = 'pc_6014_data'))
fig.add_trace(go.Scatter(y = window_pc_6011['power'], name = 'pc_6011_data'))
fig.add_trace(go.Scatter(y = window_pc_6280['power'], name = 'pc_6280_data'))
fig.add_trace(go.Scatter(y = window_pc_6281['power'], name = 'pc_6281_data'))
fig.add_trace(go.Scatter(y = window_pc_6284['power'], name = 'pc_6284_data'))