In [61]:
#%matplotlib qt
#%matplotlib notebook
%matplotlib inline
import numpy as np
import pandas as pd
from keras.callbacks import TensorBoard, History, EarlyStopping
from keras.layers import Input, Dense, Dropout, LeakyReLU, Flatten
from keras.layers.convolutional import Conv1D
from keras.utils import plot_model
from keras.models import Model, Sequential
from pathlib import Path
import sys
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
pd.options.mode.chained_assignment = None  # default='warn'
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import math
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from evaluation import *

In [76]:
# fix random seed for reproducibility
np.random.seed(13)

## net params
num_layers = 5
num_neurons = 10
kernel_size = 2
filter_size = 32
act_fct = 'relu'
out_act = 'linear'
loss_fct = 'mae'
optim = 'adam'
metrics = []
history = History()
tilt_angles = [0, 10, 15, 18, 21, 25, 30, 35, 40, 45, 60, 90]
features = ['GHI', 'DHI', 'BHI', 'Tamb', 'POA_ISE', 'Rain', 'Tilt Angle', 'Ws', 'Wd']# time?
target = ['Pmpp']
timesteps = 12
shape = (timesteps + 1, len(features + target) + 1)
#shape = ((len(features) + len(target)) * (timesteps + 1) - len(target),)

data_dir = './data/bifacial Oct2017-Oct2018/'
dir_ = './test_results_bifacial/'
set_dir(dir_)

## training params
tensorboard = False
callbacks = [EarlyStopping(patience=7, restore_best_weights=True)]
shuffle = True
epochs = 100
batch_size = 100
val_split = 1.0/10.0

output directory set to ./test_results_bifacial/


In [45]:
#dfs = list()
#for tilt in tilt_angles:
#    df = pd.read_csv(data_dir + str(tilt) + '.csv', skipinitialspace=True, sep=';').set_index('TimeStamp')
#    df = df[target + features]
#    df['Tilt'] = df['Tilt Angle']

    # delete erroneous values
#    df = df.drop(df['2018-09-12 10:00:00':'2018-09-17 09:00:00'].index)###################
    
#    for i in range(1, timesteps + 1):
#        for feature in target + features:
#            df[feature + ' t-' + str(i)] = df.shift(i)[feature]
#       
#    dfs.append(df.dropna().reset_index().set_index(['TimeStamp', 'Tilt']))

#dataset = pd.concat(dfs).sort_index()

#train, test = dataset[:('2018-08-16 11:00:00', 0.0)], dataset[('2018-08-16 12:00:00', 0.0):('2018-10-31 16:00:00', 90.0)]
#trainX, trainY = train.iloc[:,len(target):], train.iloc[:,:len(target)]
#testX, testY = test.iloc[:,len(target):], test.iloc[:,:len(target)]

In [77]:
fname = dir_ + 'data_step' + str(timesteps)

if Path(fname + '.npz').exists():
    print('Loading preprocessed dataset ...')
    with np.load(fname + '.npz') as datafile:
        trainX = datafile['trainX']
        trainY = datafile['trainY']
        testX = datafile['testX']
        testY = datafile['testY']
        idx = datafile['idx']
else:
    print('Data preprocessing ...')
    trainXs = []
    trainYs = []
    testXs = []
    testYs = []
    idxs = []
    
    dfs = list()
    for tilt in tilt_angles:
        df = pd.read_csv(data_dir + str(tilt) + '.csv', skipinitialspace=True, sep=';').set_index('TimeStamp')
        df = df[target + features]
        df['Tilt'] = df['Tilt Angle']

        # delete erroneous values
        df = df.drop(df['2018-09-12 10:00:00':'2018-09-17 09:00:00'].index)
        df['forecast_horizon'] = 0
        df = df[features + ['Tilt', 'forecast_horizon'] + target].dropna().reset_index().set_index(['TimeStamp', 'Tilt'])
        
        x = []
        for i in range(timesteps+1, len(df)+1):
            #sys.stdout.write("System %i/%i: %5i/%i                \r" % (s+1, num_sys, i, len(dataset)))
            #sys.stdout.flush()
            d = df.iloc[i-timesteps-1:i].copy()
            d.iloc[-1, -len(target):] = -1
            x.append(d.values)
        x = np.array(x)
        y = df[target].iloc[timesteps:]
        
        split = df[:'2018-09-29 07:00:00'].iloc[timesteps+1:].shape[0]
        trainX, testX = x[:split], x[split:]
        trainY, testY = y.iloc[:split].values, y.iloc[split:].values
        idx = y.iloc[split:].index

        trainXs.append(trainX)
        trainYs.append(trainY)
        testXs.append(testX)
        testYs.append(testY)
        idxs.append(idx)

    a = np.stack(trainYs, axis=1)
    trainY = a.reshape(a.shape[0]*a.shape[1], a.shape[2])

    a = np.stack(trainXs, axis=1)
    trainX = a.reshape(a.shape[0]*a.shape[1], a.shape[2], a.shape[3])

    a = np.stack(testYs, axis=1)
    testY = a.reshape(a.shape[0]*a.shape[1], a.shape[2])

    a = np.stack(testXs, axis=1)
    testX = a.reshape(a.shape[0]*a.shape[1], a.shape[2], a.shape[3])

    a = np.stack(idxs, axis=1)
    idx = a.reshape(a.shape[0]*a.shape[1])

    np.savez(fname, trainX=trainX, trainY=trainY, testX=testX, testY=testY, idx=idx)
    print('Saved to ' + fname + '.npz       ')
print('Preprocessing done.')

Loading preprocessed dataset ...
Preprocessing done.


In [78]:
model = Sequential()
model.add(Conv1D(filter_size, kernel_size, input_shape=shape, activation=act_fct, dilation_rate=1, padding='causal', kernel_initializer='he_uniform'))
for n in range(num_layers):
    model.add(Conv1D(filter_size, kernel_size, activation=act_fct, dilation_rate=2**(n+1), padding='causal', kernel_initializer='he_uniform'))
model.add(Flatten())
model.add(Dense(num_neurons, activation=act_fct, kernel_initializer='he_uniform'))
model.add(Dense(num_neurons, activation=act_fct, kernel_initializer='he_uniform'))
model.add(Dense(num_neurons, activation=act_fct, kernel_initializer='he_uniform'))
model.add(Dense(len(target)))
model.add(LeakyReLU(alpha=0.001))
model.compile(loss=loss_fct, optimizer=optim, metrics=metrics)


#visible = Input(shape=shape)
#dense = visible
#for layer in range(0, num_layers):
#    dense = Dense(num_neurons, activation=act_fct)(dense)
#output = Dense(len(target), activation=out_act)(dense)
#model = Model(inputs=visible, outputs=output)
#model.compile(loss=loss_fct, optimizer=optim, metrics=metrics)

In [None]:
callbacks = callbacks
if tensorboard:
    print('tensorboard activated')
    callbacks.append(TensorBoard(log_dir='./tensorboard', histogram_freq=1, batch_size=batch_size, write_graph=True, write_grads=True, write_images=False))

model.fit(trainX, trainY, batch_size, epochs=epochs, validation_split=val_split, callbacks=callbacks, verbose=1)

Train on 28177 samples, validate on 3131 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100

In [58]:
prediction = model.predict(testX, batch_size, 1)



In [59]:
data = pd.DataFrame()
data['prediction'] = pd.DataFrame(np.array(prediction).reshape([len(prediction), len(target)])).iloc[:,0]
data['measured'] = pd.DataFrame(np.array(testY).reshape([len(testY), len(target)])).iloc[:,0]
data = data.set_index(pd.MultiIndex.from_tuples(idx)).unstack()
data.index = pd.to_datetime(data.index)


for tilt in tilt_angles:
    print('\n\nTilt Angle: ' + str(tilt) + '\n')
    horizon = 1
    name = '+' + str(horizon) + 'h-prediction tilt=' + str(tilt) 
    p_col = data[('prediction', tilt)]#name]
    m_col = data[('measured', tilt)]

    walkForwardDailyLoss(m_col, p_col, horizon=name)
    scatter_predictions(m_col, p_col, name)

    print('%s test RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col, p_col))))
    draw_boxplot(m_col, p_col, horizon=name)
    #draw_boxplot_monthly(m_col, p_col, horizon=name)
    #m1, m2 = '2018-08-18 10:00:00', '2018-08-18 14:00:00'
    #print('%s nice day RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col[m1:m2], p_col[m1:m2]))))
    #draw_boxplot(m_col, p_col, horizon=name, start=m1, end=m2)

    plot_timeseries(m_col, p_col, None, None, name, end='2018-08-22 12:00:00')
    plot_timeseries(m_col, p_col, None, None, name, start='2018-10-25 08:00:00')
    #plot_timeseries(m_col, p_col, None, None, name, start=m1, end=m2)
    plot_timeseries(m_col, p_col, None, None, name)

draw_histogram(p_col, m_col, name)



Tilt Angle: 0

+1h-prediction tilt=0 test RMSE: 0.745


Tilt Angle: 10

+1h-prediction tilt=10 test RMSE: 0.754


Tilt Angle: 15

+1h-prediction tilt=15 test RMSE: 0.942


Tilt Angle: 18

+1h-prediction tilt=18 test RMSE: 0.719


Tilt Angle: 21

+1h-prediction tilt=21 test RMSE: 0.746


Tilt Angle: 25

+1h-prediction tilt=25 test RMSE: 0.707


Tilt Angle: 30

+1h-prediction tilt=30 test RMSE: 0.749


Tilt Angle: 35

+1h-prediction tilt=35 test RMSE: 0.766


Tilt Angle: 40

+1h-prediction tilt=40 test RMSE: 0.759


Tilt Angle: 45

+1h-prediction tilt=45 test RMSE: 0.745


Tilt Angle: 60

+1h-prediction tilt=60 test RMSE: 0.858


Tilt Angle: 90

+1h-prediction tilt=90 test RMSE: 1.013


In [52]:
draw_history(history)

In [60]:
print(data.describe())
print(data.corr(method='pearson'))
#data.to_csv(dir + 'predictions.csv', encoding='utf-8')

      prediction                                                         \
            0.0        10.0       15.0       18.0       21.0       25.0   
count  30.000000  30.000000  30.000000  30.000000  30.000000  30.000000   
mean   23.784891  23.622103  23.114223  23.150133  22.880228  22.599836   
std    19.531601  19.621101  19.634401  19.433277  19.366890  19.243776   
min     3.265229   2.884106   3.118170   3.033156   3.215443   3.150404   
25%    12.814092  12.676464  12.627223  12.064635  12.055649  11.603865   
50%    16.924003  16.547441  16.088140  16.254501  15.966619  15.843459   
75%    24.707148  24.155625  23.913296  23.712176  23.308506  22.966338   
max    75.286827  75.313782  75.092873  74.243912  74.278244  73.067940   

                                                     ...       measured  \
            30.0       35.0       40.0       45.0    ...           15.0   
count  30.000000  30.000000  30.000000  30.000000    ...      30.000000   
mean   22.148108  21.728