Imports
==============================================================================================

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
from keras.callbacks import TensorBoard, History, EarlyStopping, ModelCheckpoint
from keras.layers import Input, Dense, Dropout
from keras.utils import plot_model
from keras.models import Model
from keras.optimizers import Adam
from pathlib import Path
import sys
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
pd.options.mode.chained_assignment = None  # default='warn'
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import math
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from evaluation import *
from machineLearningModel import *
from statsmodels.graphics.tsaplots import plot_acf

Configuration
==============================================================================================

In [None]:
# fix random seed for reproducibility
np.random.seed(13)

## net params
num_layers = 0#2#2#1#4#3#6
num_neurons = 50#10#50#300#50#100#500
batch_size = 1024#128#500#1000
dropout_rate = 0
const_features = ['latitude', 'longitude', 'altitude', 'modules_per_string', 'strings_per_inverter', 'tilt',
                  'azimuth', 'albedo', 'Technology', 'BIPV', 'A_c', 'N_s', 'pdc0', 'gamma_pdc']#, 'SystemID']#15
dyn_features = ['Wind Direction_x', 'Wind Direction_y', 'Total Cloud Cover', 'Low Cloud Cover', 'Medium Cloud Cover',
                'High Cloud Cover', 'Wind Speed', 'Wind Gust', 'Total Precipitation',
                'Snow Fraction', 'Mean Sea Level Pressure', 'DIF - backwards', 'DNI - backwards', 'Shortwave Radiation',
                'Temperature', 'Relative Humidity', 'Hour_x', 'Hour_y', 'Month_x', 'Month_y']#20
#const_features = ['latitude', 'longitude']#'SystemID']
#dyn_features = ['DIF - backwards', 'DNI - backwards', 'Shortwave Radiation', 'Hour_x', 'Hour_y', 'Month_x', 'Month_y']#, 'Temperature', 'Relative Humidity', 'Hour_x', 'Hour_y', 'Month_x', 'Month_y']
target_features = ['power']
drop_features = ['power_pvlib']
act_fct = 'relu'
out_act = 'linear' # linear, relu
loss_fct = 'mae' # mse, mae
optim = 'adam'#Adam(lr=0.00001)#'adam'
metrics = []
history = History()
val_history = History()

## data params
filename = './data/full_data_5_systems.csv'
correlations = []#'pearson', 'spearman', 'kendall']
timesteps = 5#24#5#1#12#5#1#3#2#1#5#168#2190#336#72#24#5#24
method = 'dilated' # randfor, mlp, lstm, dilated
flat = ['randfor', 'mlp']
recursive = True#False
num_sys = 5

## training params
pretraining = True
wfvtraining = True
tensorboard = False
callbacks = [history, EarlyStopping(patience=10, restore_best_weights=True), ModelCheckpoint('./saved_models/best_model.h5', save_best_only=True)]#, verbose=1)] #EarlyStopping(patience=5), 
shuffle = False
epochs = 200#50#20
forecast_horizon = 24#3#1#6#24#3#6#3#24
sliding_window = 169#8760#672#3#8#24#72#672#336#8760#672#336#24#8760
dir = './test_results/'
if not os.path.exists(dir):
    os.makedirs(dir)

# input shape
if method in flat:
    if recursive:
        shape = (len(const_features + dyn_features) + timesteps * (len(dyn_features + target_features)) + 1,)
    else:
        shape = (len(const_features) + timesteps * (len(dyn_features + target_features)) + forecast_horizon * len(dyn_features),)
else:
    if recursive:
        shape = (timesteps + 1, len(const_features + dyn_features + target_features) + 1)
    else:
        #shape = (timesteps, len(const_features + target_features) + len(dyn_features) * (forecast_horizon + timesteps))
        shape = (timesteps + forecast_horizon, len(const_features + dyn_features + target_features))

# output shape
if recursive:
    out_dim = len(target_features)
else:
    out_dim = forecast_horizon

Data Preprocessing
==============================================================================================

In [None]:
suffix1 = ''
suffix2 = ''
if method in flat:
    suffix1 = '_flat'
if not recursive:
    suffix2 = '_fixed'
fname = dir + 'data_step' + str(timesteps) + suffix1 + suffix2

if Path(fname + '.npz').exists():
    print('Loading preprocessed dataset ...')
    with np.load(fname + '.npz') as datafile:
        trainX = datafile['trainX']
        trainY = datafile['trainY']
        testX = datafile['testX']
        testY = datafile['testY']
        pvlib = datafile['pvlib']
        idx = datafile['idx']
else:
    print('Data preprocessing ...')
    dataframe = pd.read_csv(filename, skipinitialspace=True).set_index(['time', 'SystemID'])
    dataframe = np.array_split(dataframe, num_sys)
    pvlibs = []
    trainXs = []
    trainYs = []
    testXs = []
    testYs = []
    idxs = []
    for s in range(num_sys-1, -1, -1):
        df = dataframe[s]
        pvlibs.append(df.power_pvlib)
        dataset = df[const_features + dyn_features + target_features]

        if method in flat:
            if recursive:
                for i in range(1, timesteps + 1):
                    for feature in dyn_features + target_features:
                        sys.stdout.write("Shifting %1i/%i %24s\r" % (i, timesteps, feature))
                        sys.stdout.flush()
                        dataset[feature + ' t-' + str(i)] = dataset.shift(i)[feature]
                print('Shifting done.                ')

                dataset['forecast_horizon'] = 0
                p = dataset[target_features]
                dataset = dataset.drop(target_features, axis=1)
                for f in target_features:
                    dataset[f] = p[f]
                dataset = dataset.dropna()

                train, test = dataset[:('2015-10-10 23:00:00', s)], dataset[('2015-10-11 00:00:00', s):]
                trainX, trainY = train.iloc[:,:-len(target_features)], train.iloc[:,-len(target_features):]
                testX, testY = test.iloc[:,:-len(target_features)], test.iloc[:,-len(target_features):]
                idx = testX.index.values
            else:
                for i in range(forecast_horizon, 1, -1):
                    for feature in dyn_features:
                        sys.stdout.write("Shifting %1i/%i %24s \r" % (i, forecast_horizon, feature))
                        sys.stdout.flush()
                        dataset[feature + ' t+' + str(i)] = dataset.shift(i)[feature]

                for i in range(1, timesteps + 1):
                    for feature in dyn_features + target_features:
                        sys.stdout.write("Shifting %1i/%i %25s \r" % (i, timesteps, feature))
                        sys.stdout.flush()
                        dataset[feature + ' t-' + str(i)] = dataset.shift(i)[feature]
                print('Shifting done.                                ')

                train, test = dataset[:('2015-10-10 23:00:00', s)], dataset[('2015-10-11 00:00:00', s):]
                trainX = train.drop(target_features, axis=1)
                trainY = [train[target_features].iloc[i:i+forecast_horizon].values.flatten() for i in range(len(train))]

                testX = test.drop(target_features, axis=1)
                testY = [test[target_features].iloc[i:i+forecast_horizon].values.flatten() for i in range(len(test))]

                for f in range(1, forecast_horizon):
                    trainY[-f] = np.pad(trainY[-f], (0, forecast_horizon-f), mode='constant', constant_values=(np.nan,))
                    testY[-f] = np.pad(testY[-f], (0, forecast_horizon-f), mode='constant', constant_values=(np.nan,))
                trainY = np.array(trainY[:-forecast_horizon])
                trainX = trainX[:-forecast_horizon]
                testY = np.array(testY[:-forecast_horizon])
                testX = testX[:-forecast_horizon]
                idx = testX.index.values
        else:
            if recursive:
                dataset['forecast_horizon'] = 0
                p = dataset[target_features]
                dataset = dataset.drop(target_features, axis=1)
                for f in target_features:
                    dataset[f] = p[f]
                dataset = dataset.dropna()

                x = []
                for i in range(timesteps+1, len(dataset)+1):
                    sys.stdout.write("System %i/%i: %5i/%i                \r" % (s+1, num_sys, i, len(dataset)))
                    sys.stdout.flush()
                    d = dataset.iloc[i-timesteps-1:i].copy()
                    d.iloc[-1, -len(target_features):] = -1
                    x.append(d.values)
                x = np.array(x)
                y = dataset[target_features].iloc[timesteps:]
                split = dataset[:('2015-10-11 00:00:00', s)].iloc[timesteps+1:].shape[0]#'2015-10-12 07:00:00'
                trainX, testX = x[:split], x[split:]
                trainY, testY = y.iloc[:split].values, y.iloc[split:].values
                idx = y.iloc[split:].index
            else:
                x = []
                y = []
                for i in range(timesteps, len(dataset)-forecast_horizon):
                    sys.stdout.write("System %i/%i: %5i/%i                \r" % (s+1, num_sys, i, len(dataset)))
                    sys.stdout.flush()
                    d = dataset.iloc[i-timesteps:i+forecast_horizon].copy()
                    d.iloc[-forecast_horizon:, -len(target_features):] = -1
                    x.append(d.values)
                    y.append(dataset.iloc[i:i+forecast_horizon][target_features].values)
                x = np.array(x)
                y = np.array(y)
                y = y.reshape(y.shape[0], y.shape[1])

                split = dataset[:('2015-10-10 23:00:00', s)].iloc[timesteps:].shape[0]
                trainX, testX = x[:split-forecast_horizon], x[split:]
                trainY, testY = y[:split-forecast_horizon], y[split:]
                idx = dataset[('2015-10-10 23:00:00', s):].index[1:-1]
        trainXs.append(trainX)
        trainYs.append(trainY)
        testXs.append(testX)
        testYs.append(testY)
        idxs.append(idx)

    a = np.stack(trainYs, axis=1)
    trainY = a.reshape(a.shape[0]*a.shape[1], a.shape[2])

    a = np.stack(trainXs, axis=1)
    if method in flat:
        trainX = a.reshape(a.shape[0]*a.shape[1], a.shape[2])
    else:
        trainX = a.reshape(a.shape[0]*a.shape[1], a.shape[2], a.shape[3])

    a = np.stack(testYs, axis=1)
    testY = a.reshape(a.shape[0]*a.shape[1], a.shape[2])

    a = np.stack(testXs, axis=1)
    if method in flat:
        testX = a.reshape(a.shape[0]*a.shape[1], a.shape[2])
    else:
        testX = a.reshape(a.shape[0]*a.shape[1], a.shape[2], a.shape[3])

    a = np.stack(pvlibs, axis=1)
    if recursive:
        pvlib = a.reshape(a.shape[0]*a.shape[1], 1)[-len(testY):]
    else:
        pvlib = a.reshape(a.shape[0]*a.shape[1], 1)[-len(testY)-forecast_horizon*num_sys:]

    a = np.stack(idxs, axis=1)
    idx = a.reshape(a.shape[0]*a.shape[1])

    np.savez(fname, trainX=trainX, trainY=trainY, testX=testX, testY=testY, pvlib=pvlib, idx=idx)
    print('Saved to ' + fname + '.npz       ')
print('Preprocessing done.')

Build Model
==============================================================================================

In [None]:
val_split = 1

In [None]:
if method is 'randfor':
    model = RandomForest(200, 0.13, 'mse', verbose=1)#216 0.63 mse   10, 0.33, 'mae',  200, 0.13, 'mse': 597
else:
    if tensorboard:
        print('tensorboard activated')
        callbacks.append(TensorBoard(log_dir='./tensorboard', histogram_freq=1, batch_size=batch_size, write_graph=True, write_grads=True, write_images=False))

    if method is 'mlp':
        model = MultiLayerPerceptron(shape, out_dim, num_layers, num_neurons, loss_fct, optim,
                                     act_fct, out_act, metrics, dropout_rate, dir + 'model.png', batch_size,
                                     epochs, val_split, callbacks, 1, True)
    elif method is 'lstm':
        model = LongShortTermMemory(shape, out_dim, num_layers, num_neurons, loss_fct, optim,
                                    act_fct, out_act, metrics, dropout_rate, dir + 'model.png', batch_size,
                                    epochs, val_split, callbacks, 1, True)
    elif method is 'dilated':
        model = DilatedConvolution(shape, out_dim, 3, num_neurons, 2, 32, 'causal', loss_fct, optim, #best: layers 3, ks 2, filters 32, 'causal'
                                   act_fct, out_act, metrics, dropout_rate, dir + 'model.png', batch_size,
                                   epochs, val_split, callbacks, 1, True)

Training
==============================================================================================

In [None]:
if pretraining:
    X = trainX
    y = trainY
    
    if shuffle:
        rand_idx = np.array(np.arange(X.shape[0]))
        np.random.shuffle(rand_idx)
        X = X[rand_idx]
        y = y[rand_idx]

    print('Start pretraining ...')    
    model.learn(X, y, val_idx=int(len(y) / 10.0))

    if method is not 'randfor':
        name = './saved_models/pretrained_t-'+str(timesteps)+'_f'+str(shape[0])+'_e'+str(epochs)+'_b'+str(batch_size)#+'_sys'+str(system)
        # serialize model to JSON
        model_json = model.model.to_json()
        with open(name + ".json", "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        model.model.save_weights(name + ".h5")
        print("Saved model to disk")
    else:
        pretrained = model

In [None]:
if method is not 'randfor':
    # load best
    model.model.load_weights('./saved_models/best_model.h5')
else:
    model = pretrained

Walk-Forward Validation
==============================================================================================

In [None]:
# dilated: 3 2 32 ep1 120w t-5#
# mlp: 5 50 nich übel t-5#
# lstm: 
# randfor: 
# fixed vs recursive ?#
# batchsize = window?
# vllt nicht auf -1 sondern -1000 setzen?#
# fixer horizont vllt auch mit -1en?#
# immer jedes 5. mal lernen (wenn alle systeme mit neuen daten versorgt)
# filter auf predictions ganz am ende: negative werte bis +5 zu 0

In [None]:
model.epochs = 1#1#50#200#50#1#50#5
model.batch_size = 1024#1024#128
model.verbose = 0
model.callbacks = [val_history] 
model.shuffle = True#False#True
wfvtraining = False
sliding_window = 120#60#1680#120#840#168#1024#8760#168#24#169
threshold = -2000#1#0#10 #200 100 150 500 1000 2000
val_div = 2.0#2.0#10

predictions = []
pred_err = []
length = len(testX) - forecast_horizon * num_sys#810#10100#10000#len(testX)#3000#2600
trainset = []
trainy = []
perr = -1
for i in range(length):
    if i == 1:#sliding_window + 1:
        model.callbacks.append(EarlyStopping(patience=7, restore_best_weights=True))#5 , baseline=600
    if method is not 'randfor' and i > sliding_window + 1 and wfvtraining:
        loss = h.history['loss'][-1]
        val_loss = min(h.history['val_loss'])
        model.epochs = 50
    else:
        loss = -1
        val_loss = -1

    sys.stdout.write("Walk-Forward Validation %5i/%i: %5d %5d, pred: %6d\r" % (i+1, length, loss, val_loss, perr))
    sys.stdout.flush()
    
    
    if method in flat:
        if recursive:
            # initialize values for lagged power columns
            p = []
            for l in range(1, timesteps + 1):
                p.append(testX[i,-(l*len(dyn_features + target_features))-2])

            ps = []
            ts = []
            ty = []
            for f in range(forecast_horizon):
                # build input vector for future timestep
                t = testX[i+(f*num_sys)].reshape(1, -1)
                if t.size > 0:
                    for l in range(timesteps-1):
                        t[0][-(l*len(dyn_features + target_features))-2] = p[l]
                        p[l] = p[l+1]
                    t[0][-(timesteps-1)*len(dyn_features + target_features)-2] = p[-1]
                    t[0][-1] = f
                    ts.append(t)
                    ty.append(testY[i+(f*num_sys)])

                    # make prediction for input new vector
                    p[-1] = model.forecast(t).item(0)
                    ps.append(p[-1])

            perr = np.mean(np.abs(np.concatenate(ty).flatten() - ps))
            pred_err.append(perr)
            predictions.append(pd.DataFrame(ps))

            if wfvtraining:
                trainset += ts
                trainy += ty
                if i - sliding_window >= 0:
                    trainset = trainset[-(sliding_window*forecast_horizon):]
                    trainy = trainy[-(sliding_window*forecast_horizon):]
                if perr > threshold:# and i % num_sys == 0:
                    cond = [j//forecast_horizon - trainset[-j][0][-1] - ((j+forecast_horizon-1)//forecast_horizon-1) >= 0 for j in range(len(trainset), 0, -1)]
                    dfX = np.array(trainset)[cond][:,0]
                    dfY = np.array(trainy)[cond][:,0]
                    # train with newly available data
                    h = model.learn(dfX, dfY, val_idx=max(int(len(dfY) / val_div), 1))
        else:
            ts = np.array([testX[i]])
            ty = np.array([testY[i]])
            # make prediction for input new vector
            p = pd.DataFrame(model.forecast(ts))
            predictions.append(p)
            perr = np.mean(np.abs((ty - p).values))
            pred_err.append(perr)

            trainset.append(ts)
            trainy.append(ty)
            if i - sliding_window >= 0:
                trainset = trainset[-sliding_window:]
                trainy = trainy[-sliding_window:]

            if wfvtraining and perr > threshold:
                w = i - forecast_horizon
                if w > 0:
                    dfX = np.concatenate(trainset)
                    dfX = dfX[:w+1]
                    dfY = np.concatenate(trainy)
                    dfY = dfY[:w+1]
                    # train with newly available data
                    h = model.learn(dfX, dfY, val_idx=max(int(len(dfY) / val_div), 1))
    else:
        if recursive:
            # initialize values for lagged power columns
            p = []
            for l in range(timesteps):
                p.append(testX[i,:][l][-1])
            ps = []
            ts = []
            ty = []
            for f in range(forecast_horizon):
                # build input vector for future timestep
                t = np.array([testX[i+(f*num_sys)]])
                if t.size > 0:
                    for l in range(timesteps-1):
                        t[0][l,-1] = p[l]
                        p[l] = p[l+1]
                    t[0][-2,-1] = p[-1]
                    t[0][:,-2] = f
                    ts.append(t)
                    ty.append(testY[i+(f*num_sys)])

                    # make prediction for input new vector
                    p[-1] = model.forecast(t).item(0)
                    ps.append(p[-1])

            predictions.append(pd.DataFrame(ps))
            perr = np.mean(np.abs(np.concatenate(ty).flatten() - ps))
            pred_err.append(perr)

            if wfvtraining:
                trainset += ts
                trainy += ty
                if i - sliding_window >= 0:
                    trainset = trainset[-(sliding_window*forecast_horizon):]
                    trainy = trainy[-(sliding_window*forecast_horizon):]
                if  perr > threshold and i % num_sys == 0:
                    cond = [j//forecast_horizon - trainset[-j][0][0,-2] - ((j+forecast_horizon-1)//forecast_horizon-1) >= 0 for j in range(len(trainset), 0, -1)]
                    dfX = np.array(trainset)[cond][:,0]
                    dfY = np.array(trainy)[cond][:,0]
                    # train with newly available data
                    h = model.learn(dfX, dfY, val_idx=max(int(len(dfY) / val_div), 1))
        else:
            ts = np.array([testX[i]])
            ty = np.array([testY[i]])
            # make prediction for input new vector
            p = pd.DataFrame(model.forecast(ts))
            predictions.append(p)
            perr = np.mean(np.abs((ty - p).values))
            pred_err.append(perr)

            trainset.append(ts)
            trainy.append(ty)
            if i - sliding_window >= 0:
                trainset = trainset[-sliding_window:]
                trainy = trainy[-sliding_window:]
                
            if wfvtraining and perr > threshold:
                w = i - forecast_horizon
                if w > 0:
                    dfX = np.concatenate(trainset)
                    dfX = dfX[:w+1]
                    dfY = np.concatenate(trainy)
                    dfY = dfY[:w+1]
                    # train with newly available data
                    h = model.learn(dfX, dfY, val_idx=max(int(len(dfY) / val_div), 1))
    
prediction = pd.concat(predictions)

if method is not 'randfor':
    name = './saved_models/trained_t-'+str(timesteps)+'_f'+str(shape[0])+'_e'+str(epochs)+'_b'+str(batch_size)#+'_sys'+str(system)
    # serialize model to JSON
    model_json = model.model.to_json()
    with open(name + ".json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.model.save_weights(name + ".h5")
    print("\nSaved model to disk")

In [None]:
if recursive:
    plt.plot(np.abs([p.tolist() for p in pred_err]))
else:
    plt.plot(np.abs([p for p in pred_err]))
plt.show()

prediction = pd.concat(predictions)
plt.plot(prediction[0][0].values)
plt.show()

Evaluation
==============================================================================================

In [None]:
#length = 2000#160#2000#300#400#500
length = len(testX) - forecast_horizon * num_sys


In [None]:
plots = True#True#False#True
comp = 'pvlib'
data_list = []
for system in range(num_sys):
    directory = dir + 'system' + str(system + 1) + '/'
    set_dir(directory)
    meas = testY[system::num_sys]
    index = idx[system::num_sys]
    pred = pd.concat(predictions[system::num_sys])
    pvl = pvlib[system::num_sys]
    data = pd.DataFrame()
    for i in range(forecast_horizon):
        if recursive:
            data['+'+str(i+1)+'h-prediction'] = np.pad(pred[0][i].values, (i, 0), mode='constant', constant_values=(np.nan,))[:length]
            length = len(data)
        else:
            data['+'+str(i+1)+'h-prediction'] = np.pad(pred[i].values, (i, forecast_horizon-i-1), mode='constant', constant_values=(np.nan,))

    if recursive:
        data['measured'] = pd.DataFrame(np.array(meas).reshape([len(meas), len(target_features)])).iloc[:,0]
    else:
        data['measured'] = pd.DataFrame(np.array(meas)).iloc[:,0].append(pd.DataFrame(np.array(meas)).iloc[-forecast_horizon+1:,-1], ignore_index=True).iloc[:length]
    data = data.set_index(pd.MultiIndex.from_tuples(index[:length])).unstack()
    data['pvlib'] = pvl[:length]
    tmp = pd.DataFrame()
    tmp[comp] = data[comp]
    tmp['measured'] = data[('measured', 4-system)]#system)]
    for i in range(forecast_horizon):
        tmp['+'+str(i+1)+'h-prediction'] = data['+'+str(i+1)+'h-prediction']
    data = tmp
    data.index = pd.to_datetime(data.index)
    data = data.dropna()
    
    m_col = data['measured']
    l_col = data[comp].dropna()

    data.describe().to_csv(directory + 'description.csv', encoding='utf-8')
    data.corr(method='pearson').to_csv(directory + 'pearson.csv', encoding='utf-8')
    data.corr(method='spearman').to_csv(directory + 'spearman.csv', encoding='utf-8')
    data.corr(method='kendall').to_csv(directory + 'kendall.csv', encoding='utf-8')
    data.to_csv(directory + 'predictions.csv', encoding='utf-8')
    
    if plots:
        print('System: ' + str(system) + '#########################')
        for horizon in range(1, forecast_horizon + 1):
            name = '+' + str(horizon) + 'h-prediction'
            p_col = data[name]

            walkForwardDailyLoss(m_col, p_col, l_col, comp, name)
            scatter_predictions(m_col, p_col, name)

            print('%s test RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col, p_col))))
            print('%s test RMSE: %.3f' % (comp + ' forecast', math.sqrt(mean_squared_error(m_col, l_col))))
            draw_boxplot(m_col, p_col, l_col, comp, name, title='Absolute power prediction error', outliers=False)
            draw_boxplot_monthly(m_col, p_col, l_col, comp, name, 'Monthly power prediction error', 'w', False)

            m1, m2 = '2016-07-17 00:00:00', '2016-07-17 23:00:00'
            print('%s nice day RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col[m1:m2], p_col[m1:m2]))))
            print('%s nice day RMSE: %.3f' % (comp + ' forecast', math.sqrt(mean_squared_error(m_col[m1:m2], l_col[m1:m2]))))
            draw_boxplot(m_col, p_col, l_col, comp, name, m1, m2, title='Absolute power prediction error', outliers=False)

            plot_timeseries(m_col, p_col, l_col, comp, name, end='2015-10-18 00:00:00')
            plot_timeseries(m_col, p_col, l_col, comp, name, start='2017-02-02 10:00:00', end='2017-02-09 10:00:00')
            plot_timeseries(m_col, p_col, l_col, comp, name, start='2017-12-24 00:00:00')
            plot_timeseries(m_col, p_col, l_col, comp, name, start=m1, end=m2)
            plot_timeseries(m_col, p_col, l_col, comp, name)
            plot_timeseries(m_col, p_col, None, comp, name)

            daily_energy_error(m_col, p_col, l_col, comp, name, start='2015-10-13 00:00:00')

            draw_histogram(p_col, m_col, name)
            print()
        plot_error_by_hour_of_day(data, comp, 0, forecast_horizon)#1

        data
    data_list.append(data)

In [None]:
for data in data_list:
    l1 = []
    l2 = []
    for i in range(len(data)-(forecast_horizon-1)):
        sys.stdout.write("%i/%i\r" % (i+1, len(data)-(forecast_horizon-1)))
        sys.stdout.flush()
        forecast = []
        for f in range(1, forecast_horizon+1):
            forecast.append(data['+'+str(f)+'h-prediction'].iloc[f-1 + i])
        a = pd.DataFrame()
        a['forecast'] = forecast
        a['pvlib'] = data['pvlib'].iloc[i:forecast_horizon+i].values
        a['measured'] = data['measured'].iloc[i:forecast_horizon+i].values
        l1.append(mean_squared_error(a.measured, a.forecast))
        l2.append(mean_squared_error(a.measured, a.pvlib))
    print()
    print(math.sqrt(pd.DataFrame(l1).mean().values[0]))
    print(math.sqrt(pd.DataFrame(l2).mean().values[0]))

    plt.plot(np.sqrt(l2))
    plt.plot(np.sqrt(l1))
    plt.show()

In [None]:
d = 11650#85#1800#1850#110#0#3#436#3272
for data in data_list:
    print('##############################################')
    for i in range(d, d+forecast_horizon):
        forecast = []
        for f in range(1, forecast_horizon+1):
            forecast.append(data['+'+str(f)+'h-prediction'].iloc[f-1 + i])
        a = pd.DataFrame()
        a['forecast'] = forecast
        a['pvlib'] = data['pvlib'].iloc[i:forecast_horizon+i].values
        a['measured'] = data['measured'].iloc[i:forecast_horizon+i].values
        a.plot()
        plt.show()

In [None]:
set_dir(dir)
if method is not 'randfor':
    #draw_history(history)
    draw_history(val_history, True)
    print(val_history.history)

### Persistence

In [None]:
if recursive:
    persistence1 = pd.DataFrame()
    persistence2 = pd.DataFrame()
    persistence1['measured'] = pd.DataFrame(np.array(testY).reshape([len(testY), len(target_features)])).iloc[:,0]
    persistence2['measured'] = pd.DataFrame(np.array(testY).reshape([len(testY), len(target_features)])).iloc[:,0]
    for f in range(forecast_horizon):
        persistence1['+'+str(f+1)+'h-persistence'] = persistence1['measured'].shift(f+1)
        persistence2['+'+str(f+1)+'h-persistence'] = persistence2['measured'].shift(24)
    persistence1 = persistence1.set_index(pd.MultiIndex.from_tuples(idx)).unstack()
    persistence2 = persistence2.set_index(pd.MultiIndex.from_tuples(idx)).unstack()

In [None]:
if recursive:
    m1, m2 = '2016-07-17 00:00:00', '2016-07-17 23:00:00'

    print('persistence1:')
    print(persistence1.corr(method='pearson'))
    persistence1 = persistence1.dropna()
    m_col = persistence1['measured']
    print()
    for f in range(1, forecast_horizon + 1):
        name = '+'+str(f)+'h-persistence'
        p_col = persistence1[name]
        j = int(len(m_col) / 24)
        d1 = np.array_split(m_col, j)
        d2 = np.array_split(p_col, j)
        m_err = pd.DataFrame([math.sqrt(mean_squared_error(d1[i], d2[i])) for i in range(len(d1))]).mean()[0]
        print('%s1 test RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col, p_col))))
        print('daily mean %s1 RMSE: %.3f' % (name, m_err))
        print('%s1 nice day RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col[m1:m2], p_col[m1:m2]))))

    print('\n\n\npersistence2:')
    print(persistence2.corr(method='pearson'))
    persistence2 = persistence2.dropna()
    m_col = persistence2['measured']
    print()
    for f in range(1, forecast_horizon + 1):
        name = '+'+str(f)+'h-persistence'
        p_col = persistence2[name]
        j = int(len(m_col) / 24)
        d1 = np.array_split(m_col, j)
        d2 = np.array_split(p_col, j)
        m_err = pd.DataFrame([math.sqrt(mean_squared_error(d2[i], d1[i])) for i in range(len(d1))]).mean()[0]
        print('%s2 test RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col, p_col))))
        print('daily mean %s2 RMSE: %.3f' % (name, m_err))
        print('%s2 nice day RMSE: %.3f' % (name, math.sqrt(mean_squared_error(m_col[m1:m2], p_col[m1:m2]))))