In [1]:
import pandas as pd
from pathlib import Path
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt 

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [2]:
# from https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [4]:
RUG = pd.read_pickle("../rug_timeseries.pkl")
RUG = RUG[::10]
dfs = [RUG.filter([i]) for i in RUG]

In [None]:
RUG

# Timeseries LSTM

In [6]:
def func(df, name):

    df2 = df.copy()

    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(df2)

    train_size = int(len(dataset) * 0.67)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]


    # convert an array of values into a dataset matrix
    def create_dataset(dataset, look_back=3):
        dataX, dataY = [], []
        for i in range(len(dataset)-look_back-1):
            a = dataset[i:(i+look_back), 0]
            dataX.append(a)
            dataY.append(dataset[i + look_back, 0])
        return np.array(dataX), np.array(dataY)
    
    # reshape into X=t and Y=t+1
    look_back = 3
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    
    # reshape input to be [samples, time steps, features]
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)


    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, input_shape=(1, look_back)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
    model.fit(trainX, trainY, epochs=100, verbose=2, callbacks=[callback])

    # make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])
    # calculate root mean squared error
    trainScore = np.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
    # print('Train Score: %.2f RMSE' % (trainScore))
    testScore = np.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
    # print('Test Score: %.2f RMSE' % (testScore))

    return (name, (trainScore, testScore))

In [7]:
results = []

for name, df in zip([i for i in RUG], dfs):
    print(name)
    
    if df.isnull().values.any():
        print("NaN")
        print("-------------")
        continue

    r = func(df, name)
    print("-------------")
    results.append(r)

Ede\Pompstation Edese bos\Straat 03\Flow
Epoch 1/100
2201/2201 - 23s - loss: 0.0407 - mse: 0.0407 - 23s/epoch - 10ms/step
Epoch 2/100
2201/2201 - 16s - loss: 0.0046 - mse: 0.0046 - 16s/epoch - 7ms/step
Epoch 3/100
2201/2201 - 14s - loss: 0.0033 - mse: 0.0033 - 14s/epoch - 6ms/step
Epoch 4/100
2201/2201 - 13s - loss: 0.0032 - mse: 0.0032 - 13s/epoch - 6ms/step
Epoch 5/100
2201/2201 - 16s - loss: 0.0032 - mse: 0.0032 - 16s/epoch - 7ms/step
Epoch 6/100
2201/2201 - 14s - loss: 0.0032 - mse: 0.0032 - 14s/epoch - 6ms/step
Epoch 7/100
2201/2201 - 13s - loss: 0.0032 - mse: 0.0032 - 13s/epoch - 6ms/step
Epoch 8/100
2201/2201 - 15s - loss: 0.0032 - mse: 0.0032 - 15s/epoch - 7ms/step
Epoch 9/100
2201/2201 - 13s - loss: 0.0032 - mse: 0.0032 - 13s/epoch - 6ms/step
Epoch 10/100
2201/2201 - 9s - loss: 0.0032 - mse: 0.0032 - 9s/epoch - 4ms/step
Epoch 11/100
2201/2201 - 12s - loss: 0.0032 - mse: 0.0032 - 12s/epoch - 5ms/step
Epoch 12/100
2201/2201 - 14s - loss: 0.0032 - mse: 0.0032 - 14s/epoch - 6ms/st

In [8]:
for i in results:
    print(i)

('Ede\\Pompstation Edese bos\\Straat 03\\Flow', (24.760483278067362, 27.808596784094977))
('Oosterbeek-hoog\\Fletcher BV|Supplied', (0.1876138000380782, 0.19236711692151226))
('Renkum-Heelsum\\Parenco BV|Supplied', (0.38981021970616037, 0.3649765842151672))
('Renkum-Heelsum\\Zorggroep|Supplied', (0.1773401115267138, 0.18208379102614908))
('Pompstation Oosterbeek\\Straat 01\\Head', (0.32630830354702617, 0.7097312161050567))
('Pompstation Oosterbeek\\Straat 02\\Head', (0.36691698156184865, 0.2528286507241947))
('Reservoir Doorwerth\\Straat 01\\Flow', (7.4518741696859845, 5.313094872535263))
('Reservoir Doorwerth\\Straat 02\\Head', (0.12096356941639297, 0.22840064054007977))
('Reservoir Doorwerth\\Straat 03\\Flow', (14.213931393132043, 12.333341113422058))
('Pompstation Wageningseberg\\Head', (0.36173248284399206, 0.49926372312230355))
('Pompstation La Cabine\\Head', (0.6579024739678265, 0.5871266009311575))


In [31]:
lstm_results = [[i[0], i[1][0], i[1][1]] for i in results]
lstm_results.append(['Renkum-Heelsum\Renkum|Supplied', np.nan, np.nan])
lstm_results.append(['Pompstation Wageningseberg\Flow', np.nan, np.nan])
lstm_results = pd.DataFrame(lstm_results)


lstm_results.columns = ["Location", "Train RMSE", "Test RMSE"]
lstm_results.set_index("Location", inplace=True)
lstm_results.sort_index(inplace=True)

print(lstm_results.round(3).to_latex())

\begin{tabular}{lrr}
\toprule
 & Train RMSE & Test RMSE \\
Location &  &  \\
\midrule
Ede\Pompstation Edese bos\Straat 03\Flow & 24.760000 & 27.809000 \\
Oosterbeek-hoog\Fletcher BV|Supplied & 0.188000 & 0.192000 \\
Pompstation La Cabine\Head & 0.658000 & 0.587000 \\
Pompstation Oosterbeek\Straat 01\Head & 0.326000 & 0.710000 \\
Pompstation Oosterbeek\Straat 02\Head & 0.367000 & 0.253000 \\
Pompstation Wageningseberg\Flow & NaN & NaN \\
Pompstation Wageningseberg\Head & 0.362000 & 0.499000 \\
Renkum-Heelsum\Parenco BV|Supplied & 0.390000 & 0.365000 \\
Renkum-Heelsum\Renkum|Supplied & NaN & NaN \\
Renkum-Heelsum\Zorggroep|Supplied & 0.177000 & 0.182000 \\
Reservoir Doorwerth\Straat 01\Flow & 7.452000 & 5.313000 \\
Reservoir Doorwerth\Straat 02\Head & 0.121000 & 0.228000 \\
Reservoir Doorwerth\Straat 03\Flow & 14.214000 & 12.333000 \\
\bottomrule
\end{tabular}

