In [None]:
import pandas as pd
import numpy as np
from numpy import concatenate
from math import sqrt
import os.path
import os

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import pyplot

import pickle
from datetime import datetime

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

import tensorflow as tf
from keras.models import Sequential, Model
from keras.utils.vis_utils import plot_model
from keras.layers import Dense, LSTM, Flatten, Dropout, Bidirectional, GRU, concatenate, Embedding, Dropout, Input, SpatialDropout1D, Dropout
from keras.layers.convolutional import Conv1D, MaxPooling1D, Conv2D

optimizer='adam'

def plot(tdf):
    pyplot.figure()
    groups = np.arange(0, tdf.shape[1], 1, dtype=int)
    i = 1
    values = tdf.values
    for group in groups:
        pyplot.subplot(len(groups), 1, i)
        pyplot.plot(values[:, group])
        pyplot.title(tdf.columns[group], y=0.5, loc='right')
        i += 1
    pyplot.show()

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def show_result(fmodel, ftest_X, ftest_y, scaler):
    
    yhat = fmodel.predict(ftest_X)

    yhat_real = inverse_data(yhat, scaler.data_max_[-1], scaler.data_min_[-1])
    ftest_y_real = inverse_data(ftest_y, scaler.data_max_[-1], scaler.data_min_[-1])
    
    print('normalized:')
    print('Test Mean Absolute Error:', mean_absolute_error(ftest_y, yhat))
    print('Test Mean Squared Error:', mean_squared_error(ftest_y, yhat))
    print('Test Root Mean Squared Error:', np.sqrt(mean_squared_error(ftest_y, yhat)))
    print('Test R2:', r2_score(ftest_y, yhat))
    print()
    
    print('real:')
    print('Test Mean Absolute Error:', mean_absolute_error(ftest_y_real, yhat_real))
    print('Test Mean Squared Error:', mean_squared_error(ftest_y_real, yhat_real))
    print('Test Root Mean Squared Error:', np.sqrt(mean_squared_error(ftest_y_real, yhat_real)))
    print('Test R2:', r2_score(ftest_y_real, yhat_real))
    
    return yhat, yhat_real, ftest_y_real

def save_res(predic, ftest_y, r_predic, r_ftest_y):
    return np.round(mean_absolute_error(ftest_y, predic), 3), np.round(mean_squared_error(ftest_y, predic), 3), np.round(np.sqrt(mean_squared_error(ftest_y, predic)), 3), np.round(r2_score(ftest_y, predic), 3), np.round(mean_absolute_error(r_ftest_y, r_predic), 3), np.round(mean_squared_error(r_ftest_y, r_predic), 3), np.round(np.sqrt(mean_squared_error(r_ftest_y, r_predic)), 3), np.round(r2_score(r_ftest_y, r_predic), 3),

def save_res2(fadd, cri_all, typ):
    res = pd.read_excel(fadd)
    
    means = np.array(cri_all).mean(axis=0)
    stds = np.array(cri_all).std(axis=0)
    
    idx = len(res)
    res.at[idx, 'model'] = typ
    
    res.at[idx, 'MAE-n'] = means[0]
    res.at[idx, 'MSE-n'] = means[1]
    res.at[idx, 'RMSE-n'] = means[2]
    res.at[idx, 'R2-n'] = means[3]
    
    res.at[idx, 'MAE'] = means[4]
    res.at[idx, 'MSE'] = means[5]
    res.at[idx, 'RMSE'] = means[6]
    res.at[idx, 'R2'] = means[7]
    
    res.to_excel(fadd, index=False)   
    return res
    
def final_plot(ftest_y, fyhat, yhat_real, df, _name, typ, n):
    plt.figure(figsize=(8, 4))
    
    tplot = pd.DataFrame()
    tplot['real'] = ftest_y.flatten()
    tplot['pred'] = yhat_real.flatten()
    tplot['date'] = df[-ftest_y.shape[0]:].index
    tplot = tplot.set_index('date')

    tplot['real'].plot(label="actual", color='cyan')
    tplot['pred'].plot(label="prediction", color='salmon')

    plt.tight_layout()
    sns.despine(top=True)
    plt.subplots_adjust(left=0.07)
    plt.ylabel(_name, size=15)
    plt.xlabel('Time step', size=15)
    plt.legend(fontsize=15)
    if(not os.path.isdir('plots/'+typ)):
        os.mkdir('plots/'+typ)
    plt.savefig('plots/'+typ+'/predict'+str(n)+'.jpg', dpi=300)
    plt.show()

def plot_neural(history, typ, n, kind):
    pyplot.plot(history.history[kind], label='train')
    pyplot.plot(history.history['val_'+kind], label='test')
    pyplot.legend()
    if(not os.path.isdir('plots/'+typ)):
        os.mkdir('plots/'+typ)
    pyplot.savefig('plots/'+typ+'/'+kind+str(n)+'.jpg', dpi=300)
    pyplot.show()  
    
def run_deep_model(fmodel, ftrain_X, ftrain_y, ftest_X, ftest_y, epochs, batch_size):
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    history = fmodel.fit(ftrain_X, ftrain_y,
                         epochs=epochs, batch_size=batch_size,
                         validation_data=(ftest_X, ftest_y),
                         verbose=2,
#                          callbacks=[callback],
                         shuffle=False)   
    return history, fmodel
    
def define_model_lstm(i1, i2):
    modl = Sequential()
    modl.add(LSTM(60, input_shape=(i1, i2), return_sequences=True))
    modl.add(Dropout(0.2))
    modl.add(LSTM(50, return_sequences=True))
    modl.add(Dropout(0.1))
    modl.add(LSTM(30, return_sequences=True))
    modl.add(Dropout(0.1))
    modl.add(LSTM(20))
    modl.add(Dropout(0.05))
    modl.add(Dense(7, activation='relu'))
    modl.add(Dropout(0.01))
    modl.add(Dense(1))
    
    modl.compile(loss=['mean_squared_error'], optimizer=optimizer, metrics=['mean_absolute_error'])

    plot_model(modl, to_file='plots/lstm.png', show_shapes=True, show_layer_names=True, dpi=300)
    return modl
    
def train_test_lstm(values, n_train_hours):
    train = values[:n_train_hours, :]
    test = values[n_train_hours:, :]

    train_X, train_y = train[:, :-1], train[:, -1]
    test_X, test_y = test[:, :-1], test[:, -1]

    train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
    test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
    return train_X, train_y, test_X, test_y
    
def preprocess_deep(fvalues, nn, tdf, prange, train_size):
    fvalues = fvalues.astype('float32')
    if(prange):
        scaler = MinMaxScaler(feature_range=(0, 1))
    else:
        scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled = scaler.fit_transform(fvalues)
    reframed = series_to_supervised(scaled[:], nn, 1)
    cs = np.arange((nn)*tdf.shape[1], ((nn+1)*tdf.shape[1])-1)
    reframed.drop(reframed.columns[cs], axis=1, inplace=True)
    values = reframed.values
    n_train_hours = int(reframed.shape[0]*train_size)
    return values, n_train_hours, scaler
    
def inverse_data(x, nMax, nMin):
    return (nMax - nMin)*x + nMin

In [None]:
nn = 7
p_range = True # true -> (0, 1) false -> (-1, 1)

train_size = .8
epochs = 100
batch_size = 2048
repeat = 1
res_add = 'res.xlsx'

mv_avg = False
rolling_n = 50

# Generation

In [None]:
typ = 'lstm+gen'
target = 'PV'

In [None]:
PV_data = pd.read_csv("miris_pv.csv", parse_dates=['DateTime'], index_col= ['DateTime'])
weather_data = pd.read_csv('weather_data.csv', parse_dates=['Time'], index_col= ['Time'])

PV_data.index = pd.to_datetime(PV_data.index)
weather_data.index = pd.to_datetime(weather_data.index)

PV_data = PV_data.reset_index()
PV_data.set_index('DateTime', inplace=True)
PV_df = PV_data.resample('15T').mean()

weather_filtered = weather_data.loc['2019-05-14T00:00:00+02:00':'2019-06-18T00:00:00+02:00']
PV_filtered = PV_df.loc['2019-05-14T00:00:00+02:00':'2019-06-18T00:00:00+02:00']

merged_df = pd.concat([PV_filtered, weather_filtered], axis=1)
merged_df.index.name = 'datetime'
merged_df.reset_index(inplace=True)
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df = merged_df.set_index('datetime')
merged_df = merged_df.asfreq('15T')
merged_df = merged_df.drop(['SNOW'], axis = 1)
merged_df = merged_df.replace('\u202f', ' ', regex=True)
merged_df['SWDtop'] = merged_df['SWDtop'].str.replace(' ', '').astype(float)

In [None]:
cor = merged_df.corr()
cor[target].sort_values(ascending=False)

In [None]:
cor[target].sort_values(ascending=False).index

In [None]:
c = ['RH2m','ST','SWDtop', 'SWD', 'TT2M', 'PV']
# c = ['SWDtop', 'SWD', 'ST', 'TT2M', 'WS10m', 'PREC', 'CU', 'CM',
#        'WS100m', 'CD', 'RH2m', 'PV']

if(mv_avg):
    for cc in c:
        merged_df[cc] = merged_df[cc].rolling(rolling_n).mean()
    merged_df = merged_df[rolling_n:]
    
df = merged_df[c]

values = df.values

plot(df)

values, n_train_hours, scaler = preprocess_deep(values, nn, df, p_range, train_size)
train_X, train_y, test_X, test_y = train_test_lstm(values, n_train_hours)

cri = []
for i in range(repeat):    
    model = define_model_lstm(train_X.shape[1], train_X.shape[2])
    his, model = run_deep_model(model, train_X, train_y, test_X, test_y, epochs, batch_size)

    plot_neural(his, typ, i, 'loss')
    plot_neural(his, typ, i, 'mean_absolute_error')

    pred, r_pred, real_y = show_result(model, test_X, test_y, scaler)
    cri.append(save_res(pred, test_y, r_pred, real_y))
    final_plot(real_y, pred, r_pred, df, target, typ, i)
    
save_res2(res_add, cri, typ)

# Load

In [None]:
typ = 'lstm+load'
target = 'Conso'

In [None]:
load_data = pd.read_csv("miris_load.csv", parse_dates=['DateTime'], index_col= ['DateTime'])
weather_data = pd.read_csv('weather_data.csv', parse_dates=['Time'], index_col= ['Time'])

load_data.index = pd.to_datetime(load_data.index)
weather_data.index = pd.to_datetime(weather_data.index)

load_data = load_data.reset_index()
load_data.set_index('DateTime', inplace=True)
load_df = load_data.resample('15T').mean()

weather_filtered = weather_data.loc['2019-05-14T00:00:00+02:00':'2019-06-18T00:00:00+02:00']
load_filtered = load_df.loc['2019-05-14T00:00:00+02:00':'2019-06-18T00:00:00+02:00']

merged_df = pd.concat([load_filtered, weather_filtered], axis=1)
merged_df.index.name = 'datetime'
merged_df.reset_index(inplace=True)
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df = merged_df.set_index('datetime')
merged_df = merged_df.asfreq('15T')
merged_df = merged_df.drop(['SNOW'], axis = 1)
merged_df = merged_df.replace('\u202f', ' ', regex=True)

merged_df['SWDtop'] = merged_df['SWDtop'].str.replace(' ', '').astype(float)

In [None]:
cor = merged_df.corr()
cor[target].sort_values(ascending=False)

In [None]:
# c = ['SWDtop', 'SWD', 'RH2m', 'Conso']
c = ['SWDtop', 'SWD', 'ST', 'TT2M', 'WS10m', 'CU', 'PREC', 'CM',
       'CD', 'WS100m', 'RH2m', 'Conso']

if(mv_avg):
    for cc in c:
        merged_df[cc] = merged_df[cc].rolling(rolling_n).mean()
    merged_df = merged_df[rolling_n:]

df = merged_df[c]

values = df.values

plot(df)

values, n_train_hours, scaler = preprocess_deep(values, nn, df, p_range, train_size)
train_X, train_y, test_X, test_y = train_test_lstm(values, n_train_hours)

cri = []
for i in range(repeat):    
    model = define_model_lstm(train_X.shape[1], train_X.shape[2])
    his, model = run_deep_model(model, train_X, train_y, test_X, test_y, epochs, batch_size)

    plot_neural(his, typ, i, 'loss')
    plot_neural(his, typ, i, 'mean_absolute_error')

    pred, r_pred, real_y = show_result(model, test_X, test_y, scaler)
    cri.append(save_res(pred, test_y, r_pred, real_y))
    final_plot(real_y, pred, r_pred, df, target, typ, i)
    
save_res2(res_add, cri, typ)