In [1]:
#%matplotlib qt
#%matplotlib notebook
%matplotlib inline
import numpy as np
import pandas as pd
from keras.callbacks import TensorBoard, History
from keras.layers import Input, Dense, Dropout
from keras.utils import plot_model
from keras.models import Model
from pathlib import Path
import sys
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
pd.options.mode.chained_assignment = None  # default='warn'
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import math
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from evaluation import *

Using TensorFlow backend.


In [2]:
# fix random seed for reproducibility
np.random.seed(13)

## net params
num_layers = 4
num_neurons = 10
act_fct = 'relu'
out_act = 'linear'
loss_fct = 'mae'
optim = 'adam'
metrics = []
history = History()
tilt_angles = [0, 10, 15, 18, 21, 25, 30, 35, 40, 45, 60, 90]
features = ['GHI', 'DHI', 'BHI', 'Tamb', 'POA_ISE', 'Rain', 'Tilt Angle', 'Ws', 'Wd']# time?
target = ['Pmpp']
timesteps = 3
shape = ((len(features) + len(target)) * (timesteps + 1) - len(target),)

data_dir = './data/bifacial Oct2017-Oct2018/'

## training params
tensorboard = False
shuffle = True
epochs = 50
batch_size = 100
val_split = 1.0/10.0

In [3]:
dfs = list()
for tilt in tilt_angles:
    df = pd.read_csv(data_dir + str(tilt) + '.csv', skipinitialspace=True, sep=';').set_index('TimeStamp')
    df = df[target + features]
    df['Tilt'] = df['Tilt Angle']

    # delete erroneous values
    df = df.drop(df['2018-09-12 10:00:00':'2018-09-17 09:00:00'].index)
    
    for i in range(1, timesteps + 1):
        for feature in target + features:
            df[feature + ' t-' + str(i)] = df.shift(i)[feature]
       
    dfs.append(df.dropna().reset_index().set_index(['TimeStamp', 'Tilt']))

dataset = pd.concat(dfs).sort_index()

train, test = dataset[:('2018-08-16 11:00:00', 0.0)], dataset[('2018-08-16 12:00:00', 0.0):('2018-10-31 16:00:00', 90.0)]
trainX, trainY = train.iloc[:,len(target):], train.iloc[:,:len(target)]
testX, testY = test.iloc[:,len(target):], test.iloc[:,:len(target)]

In [4]:
visible = Input(shape=shape)
dense = visible
for layer in range(0, num_layers):
    dense = Dense(num_neurons, activation=act_fct)(dense)
output = Dense(len(target), activation=out_act)(dense)
model = Model(inputs=visible, outputs=output)
model.compile(loss=loss_fct, optimizer=optim, metrics=metrics)

In [5]:
callbacks = [history]
if tensorboard:
    print('tensorboard activated')
    callbacks.append(TensorBoard(log_dir='./tensorboard', histogram_freq=1, batch_size=batch_size, write_graph=True, write_grads=True, write_images=False))

model.fit(trainX, trainY, batch_size, epochs=epochs, validation_split=val_split, callbacks=callbacks, verbose=1)

Train on 22702 samples, validate on 2523 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2645725fb70>

In [6]:
prediction = model.predict(testX, batch_size, 1)



In [7]:
data = pd.DataFrame()
data['prediction'] = pd.DataFrame(np.array(prediction).reshape([len(prediction), len(target)])).iloc[:,0]
data['measured'] = pd.DataFrame(np.array(testY).reshape([len(testY), len(target)])).iloc[:,0]
data = data.set_index(pd.MultiIndex.from_tuples(testY.index)).unstack()
data.index = pd.to_datetime(data.index)


for tilt in tilt_angles:
    print('\n\nTilt Angle: ' + str(tilt) + '\n')
    horizon = 1
    name = '+' + str(horizon) + 'h-prediction tilt=' + str(tilt) 
    p_col = data[('prediction', tilt)]#name]
    m_col = data[('measured', tilt)]

    walkForwardDailyLoss(m_col, p_col, horizon=name)
    scatter_predictions(m_col, p_col, name)

    print('%s test RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col, p_col))))
    draw_boxplot(m_col, p_col, horizon=name)
    #draw_boxplot_monthly(m_col, p_col, horizon=name)
    m1, m2 = '2018-08-18 10:00:00', '2018-08-18 14:00:00'
    print('%s nice day RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col[m1:m2], p_col[m1:m2]))))
    draw_boxplot(m_col, p_col, horizon=name, start=m1, end=m2)

    plot_timeseries(m_col, p_col, None, None, name, end='2018-08-22 12:00:00')
    plot_timeseries(m_col, p_col, None, None, name, start='2018-10-25 08:00:00')
    plot_timeseries(m_col, p_col, None, None, name, start=m1, end=m2)
    plot_timeseries(m_col, p_col, None, None, name)

draw_histogram(p_col, m_col, name)



Tilt Angle: 0

daily mean +1h-prediction tilt=0 RMSE: 4.424604060646228
               0
count  22.000000
mean    4.424604
std     4.718039
min     0.798724
25%     2.989707
50%     3.854646
75%     4.095685
max    24.935792
+1h-prediction tilt=0 test RMSE: 6.415
+1h-prediction tilt=0 nice day RMSE: 4.256


Tilt Angle: 10

daily mean +1h-prediction tilt=10 RMSE: 4.460037234851779
               0
count  22.000000
mean    4.460037
std     5.065137
min     0.947598
25%     2.855047
50%     3.568997
75%     4.343829
max    26.536096
+1h-prediction tilt=10 test RMSE: 6.689
+1h-prediction tilt=10 nice day RMSE: 5.399


Tilt Angle: 15

daily mean +1h-prediction tilt=15 RMSE: 4.639795643230531
               0
count  22.000000
mean    4.639796
std     5.302792
min     0.948174
25%     2.730122
50%     3.425795
75%     5.050703
max    27.510243
+1h-prediction tilt=15 test RMSE: 6.984
+1h-prediction tilt=15 nice day RMSE: 5.587


Tilt Angle: 18

daily mean +1h-prediction tilt=18 RMSE: 4.83377

In [8]:
draw_history(history)

In [9]:
print(data.describe())
print(data.corr(method='pearson'))
#data.to_csv(dir + 'predictions.csv', encoding='utf-8')

       prediction                                                              \
             0.0         10.0        15.0        18.0        21.0        25.0   
count  545.000000  545.000000  545.000000  545.000000  545.000000  545.000000   
mean   133.278046  143.533005  148.227707  150.772598  152.969025  155.351044   
std     71.736992   77.967331   80.950783   82.603271   84.066360   85.720528   
min      1.942676    2.469663    2.004886    2.245284    2.195261    2.152254   
25%     66.337547   69.158875   70.667648   72.991638   72.549393   72.117912   
50%    143.355392  158.602661  163.812149  169.411270  173.583084  177.079422   
75%    193.690643  211.843094  219.422440  224.635941  228.250626  233.113266   
max    283.188995  298.989105  306.872040  312.075775  313.603302  317.565338   

                                                          ...        measured  \
             30.0        35.0        40.0        45.0     ...            15.0   
count  545.000000  545.0000