In [1]:
# Imports
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten
import cufflinks as cf
from plotly.offline import init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
from keras.layers import MaxPooling1D, Conv1D, Dropout
##

In [2]:
# Get raw data
def get_raw_data(path):
    df = pd.read_excel(path, index_col=0)
    # print(df.head(3))
    return df

raw_data = get_raw_data('../data/raw_data/cascaded_use_case_data.xlsx')

In [3]:
target = raw_data[['Q_Kalltveit']]
target.index = pd.to_datetime(target.index)

In [4]:
scaler = MinMaxScaler()
target_data = target.values
scaler.fit(target_data)
scaled_data = scaler.transform(target_data)

In [5]:
def prepare_data(timeseries_data, n):
    X, y = [], []
    for i in range(len(timeseries_data)):
        end_idx = i + n
        if end_idx > len(timeseries_data) - 1:
            break
        seq_x, seq_y = timeseries_data[i:end_idx], timeseries_data[end_idx]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)
n_features = 6
X, y = prepare_data(scaled_data, n_features)

In [6]:
# 70 / 20 / 10
X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=int(len(target)*0.3), shuffle=False)
X_val, X_test, y_val, y_test = train_test_split(X_test_val, y_test_val, test_size=int(len(X_test_val)*0.3), shuffle=False)
X_train.shape, X_val.shape, X_test.shape

((44296, 6, 1), (13291, 6, 1), (5695, 6, 1))

In [7]:
lstm = Sequential()
lstm.add(LSTM(64, activation='relu', return_sequences=True, input_shape=(n_features, 1)))
lstm.add(LSTM(64, activation='relu'))
lstm.add(Dense(1))
lstm.compile(optimizer='adam', loss='mse')
lstm.fit(X_train, y_train, epochs=20, verbose=0, batch_size=64, validation_data=(X_val, y_val))
lstm.summary()

In [None]:
cnn = Sequential()
cnn.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_features, 1)))
cnn.add(MaxPooling1D(pool_size=2))
cnn.add(Flatten())
cnn.add(Dense(50, activation='relu'))
cnn.add(Dense(1))
cnn.compile(optimizer='adam', loss='mse')
cnn.fit(X_train, y_train, epochs=20, verbose=0, batch_size=64, validation_data=(X_val, y_val))
cnn.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 5, 64)             192       
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 2, 64)            0         
 1D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 128)               0         
                                                                 
 dense_6 (Dense)             (None, 50)                6450      
                                                                 
 dense_7 (Dense)             (None, 1)                 51        
                                                                 
Total params: 6,693
Trainable params: 6,693
Non-trainable params: 0
____________________________________________________

In [None]:
mlp = Sequential()
mlp.add(Dense(64, activation='relu', input_dim=n_features))
mlp.add(Dense(1))
mlp.compile(optimizer='adam', loss='mse')
mlp.fit(X_train, y_train, epochs=20, verbose=0, batch_size=64, validation_data=(X_val, y_val))
mlp.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 64)                448       
                                                                 
 dense_9 (Dense)             (None, 1)                 65        
                                                                 
Total params: 513
Trainable params: 513
Non-trainable params: 0
_________________________________________________________________


In [None]:
y_val = scaler.inverse_transform(y_val)

In [None]:
lstm_predictions = lstm.predict(X_val).flatten()
lstm_predictions = scaler.inverse_transform(lstm_predictions.reshape(-1,1))
lstm_val_mae = mean_absolute_error(y_val, lstm_predictions)



In [None]:
cnn_predictions = cnn.predict(X_val).flatten()
cnn_predictions = scaler.inverse_transform(cnn_predictions.reshape(-1,1))
cnn_val_mae = mean_absolute_error(y_val, cnn_predictions)



In [None]:
mlp_predictions = mlp.predict(X_val).flatten()
mlp_predictions = scaler.inverse_transform(mlp_predictions.reshape(-1,1))
mlp_val_mae = mean_absolute_error(y_val, mlp_predictions)



In [None]:
idx_val = target.index[len(X_train):len(X_train)+len(X_val)]

In [None]:
predictions = pd.DataFrame(data={
    'LSTM predictions': [y[0] for y in lstm_predictions],
    'Actuals': [y[0] for y in y_val]}, index=idx_val)
predictions.iplot(kind='spread')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



In [None]:
mean_value = np.mean(target.values)
val_comparisons = pd.DataFrame(data={
    'cnn': [cnn_val_mae, mean_squared_error(y_val, cnn_predictions, squared=False), mean_squared_error(y_val, cnn_predictions, squared=False)/ mean_value],
    'lstm': [lstm_val_mae, mean_squared_error(y_val, lstm_predictions, squared=False), mean_squared_error(y_val, lstm_predictions, squared=False)/ mean_value],
    'mlp': [mlp_val_mae, mean_squared_error(y_val, mlp_predictions, squared=False), mean_squared_error(y_val, mlp_predictions, squared=False)/ mean_value],
}, index=['MAE', 'RMSE', 'NRMSE'])
val_comparisons

Unnamed: 0,cnn,lstm,mlp
MAE,0.4189,0.253801,0.191626
RMSE,0.870368,0.340108,0.368143
NRMSE,0.133713,0.05225,0.056557


In [None]:
y_test = scaler.inverse_transform(y_test)
lstm_test_predictions = lstm.predict(X_test)
lstm_test_predictions = scaler.inverse_transform(lstm_test_predictions.reshape(-1,1))
lstm_test_mae = mean_absolute_error(y_test, lstm_test_predictions)
print("Mean Absolute Error lstm on test data:", lstm_test_mae)
print("RMSE on test data:", mean_squared_error(y_test, lstm_test_predictions, squared=False))
print("NRSME on test data", mean_squared_error(y_test, lstm_test_predictions, squared=False)/ mean_value)

Mean Absolute Error lstm on test data: 0.26222750468459566
RMSE on test data: 0.3793878486988123
NRSME on test data 0.05828446649907043


In [None]:
test_predictions = pd.DataFrame(data={
    'LSTM predictions': [y[0] for y in lstm_test_predictions],
    'Actuals': [y[0] for y in y_test]}, index=target.index[len(target_data)-len(y_test):])
test_predictions.iplot(kind='spread')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



In [None]:
y_train = scaler.inverse_transform(y_train)
lstm_train_predictions = lstm.predict(X_train)
lstm_train_predictions = scaler.inverse_transform(lstm_train_predictions.reshape(-1,1))
lstm_test_mae = mean_absolute_error(y_train, lstm_train_predictions)
print("Mean Absolute Error lstm on train data:", lstm_test_mae)
print("RMSE on train data:", mean_squared_error(y_train, lstm_train_predictions, squared=False))
print("NRSME on train data", mean_squared_error(y_train, lstm_train_predictions, squared=False)/ mean_value)

Mean Absolute Error lstm on train data: 0.25995272666052255
RMSE on train data: 0.3991659058061137
NRSME on train data 0.06132292308338332


In [None]:
val = [x[0] for x in X_val]
history = [x for x in val]
p = list()
for i in range(len(val)):
	yhat = history[-1]
	p.append(yhat)
	obs = val[i]
	history.append(obs)
print("Mean Absolute Error:", mean_absolute_error(y_val, scaler.inverse_transform(p)))
print("RMSE:", mean_squared_error(y_val, scaler.inverse_transform(p), squared=False))

Mean Absolute Error: 1.1709861605597773
RMSE: 3.101814197264232
