In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
import pandas as pd
import tensorflow as tf
from tensorflow.keras import optimizers
import pickle
import sys
sys.path.append("..")
from bayesian_models import Pbnn

Loading the training dataset

In [2]:
modelID = 3
# (0 for SCADA only, 1 for SCADA+Acc17, 2 for SCADA+Acc38, 3 for SCADA+Acc77, 
# 4 for SCADA+Acc17&38, 5 for SCADA+Acc17&38&77 
include_wave = 'no'
duration = '24M'
durations = ['3M','6M','9M','12M','15M','18M','21M','24M']
train_end_dates = ['2018-03-31 23:50:00+00:00', '2018-06-30 23:50:00+00:00', '2018-09-30 23:50:00+00:00', 
                  '2018-12-31 23:50:00+00:00', '2019-03-31 23:50:00+00:00', '2019-06-30 23:50:00+00:00',
                  '2019-09-30 23:50:00+00:00', '2019-12-31 23:50:00+00:00']
train_end_date = train_end_dates[durations.index(duration)]

# Laod train data 
train_input = pd.read_pickle('../../DATA/train_input')
train_output = pd.read_pickle('../../DATA/train_output')
index = train_input.columns

train_input = train_input.loc['2018-01-01 00:00:00+00:00':train_end_date]
train_output= train_output.loc['2018-01-01 00:00:00+00:00':train_end_date]
# print(train_input.shape)

# Normlaization of input data
# Data normalization according to training dataset/ model
filehandler = open('../../DATA/Norm', 'rb') 
std_scaler = pickle.load(filehandler)
inputn = pd.DataFrame(std_scaler.transform(train_input), columns=train_input.columns) 
# inputn is still a daraframe with numeric index
outputn = train_output/10**5  #  change of units, outputn is still a daraframe with time index

# Retrive features based on the modelID
index1 = pd.core.indexes.base.Index([]) # create a blank index array
if include_wave == 'yes': 
    index1 = index1.append(index[0:3])
if modelID == 1: # Acc17
    index1 = index1.append(index[[3,4,9,10,15,16]])
if modelID == 2: # Acc38
    index1 = index1.append(index[[5,6,11,12,17,18]])
if modelID == 3: # Acc77
    index1 = index1.append(index[[7,8,13,14,19,20]])
if modelID == 4: # Acc17&38
    index1 = index1.append(index[[3,4,5,6,9,10,11,12,15,16,17,18]])   
if modelID == 5: # Acc17&38&77
    index1 = index1.append(index[3:21])

index1 = index1.append(index[21:]) # SCADA
X = inputn[index1].values
Y = outputn.values

FileNotFoundError: [Errno 2] No such file or directory: '../../DATA/train_input'

Build the network

In [None]:
config = {"n_infeatures": X.shape[1],
          "n_outfeatures": Y.shape[1],
          "n_samples": X.shape[0],
          "outout_dist": "Normal",
          "learn_all_params": True} 

fl_model = Pbnn(config)
fl_model.build_bnn(3,[32,64,32])

Train the network

In [None]:
train_env = {"optimizer": optimizers.Adam,
             "learning_rate": 0.0002,
             "batch_size": 1024,
             "epochs": 2000,
             "callback_patience": 30,
             "verbose": 1}
fl_model.train_bnn(X,Y,train_env)

# Save the weights
# with open("../../DATA/Model%d_IncWave%s_%s.h5" % (modelID, include_wave, duration), "wb") as fp:   #Pickling
#     pickle.dump(fl_model.weights, fp)
# with open("../../DATA/Model%d_IncWave%s_%s.h5" % (modelID, include_wave, duration),  "rb") as fp:   # Unpickling
#     b = pickle.load(fp)

Loading the test dataset

In [None]:
test_input = pd.read_pickle('../../DATA/test_input')
test_output = pd.read_pickle('../../DATA/test_output')
index = test_input.columns
# Sort in order of increasing wind speed
# test_input = test_input.sort_values(by=['mean_BB_G10_windspeed'])
# test_input = test_input.drop_duplicates(subset=['mean_BB_G10_windspeed'], keep='last')
# test_output = test_output.reindex(test_input.index)

# Normlaization of input data
# Data normalization according to training dataset/ model
filehandler = open('../../DATA/Norm', 'rb') 
std_scaler = pickle.load(filehandler)
inputn = pd.DataFrame(std_scaler.transform(test_input), columns=test_input.columns) 
# inputn is still a daraframe with numeric index
outputn = test_output/10**5 #  change of units, outputn is still a daraframe with time index

# Retrive features based on the modelID
index1 = pd.core.indexes.base.Index([]) # create a blank index array
if include_wave == 'yes': 
    index1 = index1.append(index[0:3])
if modelID == 1: # Acc17
    index1 = index1.append(index[[3,4,9,10,15,16]])
if modelID == 2: # Acc38
    index1 = index1.append(index[[5,6,11,12,17,18]])
if modelID == 3: # Acc77
    index1 = index1.append(index[[7,8,13,14,19,20]])
if modelID == 4: # Acc17&38
    index1 = index1.append(index[[3,4,5,6,9,10,11,12,15,16,17,18]])   
if modelID == 5: # Acc17&38&77
    index1 = index1.append(index[3:21])

index1 = index1.append(index[21:]) # SCADA
Xtest = inputn[index1].values
Ytest = outputn.values

Evaluate the network output wrt to the test labels

In [None]:
ELL_Ytest = fl_model.evaluate_bnn(Xtest, Ytest, nsim=100)
print(np.mean(ELL_Ytest,axis=0))

Plot some test samples

In [None]:
Xt = Xtest[1:50,:]
Yt = Ytest[1:50,:]
Mean_Y, Stdv_Y = fl_model.test_bnn(Xt, nsim=100)

cm = 1/2.54
x = np.arange(Xt.shape[0])
fig, ax = plt.subplots(2, figsize=(17*cm, 8*cm), sharey='row', dpi=80, facecolor='w', edgecolor='k')
plt.subplots_adjust(left=0.1, right=.98, top=0.98, bottom=0.15, hspace = 0.1, wspace=0.15)
ax[0].plot(x, Mean_Y[:,0], 'r-', label='Predictive mean');
ax[0].scatter(x, Yt[:,0], marker='+', label='Measured');
ax[0].fill_between(x,np.squeeze(Mean_Y[:,0]+1.96*Stdv_Y[:,0]), np.squeeze(Mean_Y[:,0] -1.96*Stdv_Y[:,0]),
                 alpha=0.5, label='95% CI (+/- 1.96std)')
ax[0].set_yticklabels([])
ax[0].set_xticklabels([])
ax[0].set_ylabel('DEM$_{tl}$')
ax[0].legend(ncol=3)

ax[1].plot(x, Mean_Y[:,1], 'r-', label='Predictive mean');
ax[1].scatter(x, Yt[:,1], marker='+', label='Measured');
ax[1].fill_between(x,np.squeeze(Mean_Y[:,1]+1.96*Stdv_Y[:,1]), np.squeeze(Mean_Y[:,1] -1.96*Stdv_Y[:,1]),
                 alpha=0.5, label='95% CI (+/- 1.96std)')
ax[1].set_yticklabels([])
ax[1].set_xticklabels([])
ax[1].set_ylabel('DEM$_{tn}$')
ax[1].legend(ncol=3)

Quantify model uncertainty

In [None]:
Mean_muY, Stdv_muY, Mean_sigmaY, Stdv_sigmaY = fl_model.modeluq_bnn(Xtest, nsim=100)

Compute percentage error betweeen predicted means and labels

In [None]:
Percentage_error = 100*np.abs(Mean_muY-Ytest)/Ytest
print(np.mean(Percentage_error, axis=0))