In [6]:
import tensorflow as tf
import pandas as pd
import os
import numpy as np
import datetime as dt
from random import seed
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import backend as K
tf.compat.v1.experimental.output_all_intermediates(True)

In [7]:
tf.random.set_seed(36)

## Dataset - Both Sto. Nino and Montalban

In [8]:
df = pd.read_csv("2016_2017_sto_nino_and_montalban.csv")
df

Unnamed: 0.1,Unnamed: 0,datetime,month,day,hour,t,Station,x,Rainfall_Aries,Rainfall_Boso,Rainfall_Campana,Rainfall_Nangka,Rainfall_Oro,Waterlevel,Discharge,Cross_Section,Velocity,fric_coeff,slope
0,0,2016-01-01 00:00:00,1,1,0,0.0,Montalban,0,0,1,2,0,0,21.03,14.842428,630.90,0.023526,0.035,0.002000
1,1,2016-01-01 00:00:00,1,1,0,0.0,Sto_Nino,14420,0,1,2,0,0,12.18,21.033407,803.88,0.026165,0.033,0.000667
2,2,2016-01-01 01:00:00,1,1,1,3600.0,Montalban,0,0,1,1,1,0,21.03,14.842428,630.90,0.023526,0.035,0.002000
3,3,2016-01-01 01:00:00,1,1,1,3600.0,Sto_Nino,14420,0,1,1,1,0,12.19,21.280072,804.54,0.026450,0.033,0.000667
4,4,2016-01-01 02:00:00,1,1,2,7200.0,Montalban,0,1,1,1,0,1,21.03,14.842428,630.90,0.023526,0.035,0.002000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35035,35035,2017-12-31 21:00:00,12,31,21,63147600.0,Sto_Nino,14420,0,0,0,0,0,12.44,28.244204,821.04,0.034401,0.033,0.000667
35036,35036,2017-12-31 22:00:00,12,31,22,63151200.0,Montalban,0,0,0,0,0,0,21.18,17.224575,635.40,0.027108,0.035,0.002000
35037,35037,2017-12-31 22:00:00,12,31,22,63151200.0,Sto_Nino,14420,0,0,0,0,0,12.44,28.244204,821.04,0.034401,0.033,0.000667
35038,35038,2017-12-31 23:00:00,12,31,23,63154800.0,Montalban,0,0,0,0,0,0,21.18,17.224575,635.40,0.027108,0.035,0.002000


In [9]:
df = df[['x', 't', 'fric_coeff', 'slope', 'Rainfall_Aries', 'Rainfall_Boso', 'Rainfall_Campana', 'Rainfall_Nangka', 'Rainfall_Oro', 'Waterlevel', 'Velocity', 'Discharge']]
df

Unnamed: 0,x,t,fric_coeff,slope,Rainfall_Aries,Rainfall_Boso,Rainfall_Campana,Rainfall_Nangka,Rainfall_Oro,Waterlevel,Velocity,Discharge
0,0,0.0,0.035,0.002000,0,1,2,0,0,21.03,0.023526,14.842428
1,14420,0.0,0.033,0.000667,0,1,2,0,0,12.18,0.026165,21.033407
2,0,3600.0,0.035,0.002000,0,1,1,1,0,21.03,0.023526,14.842428
3,14420,3600.0,0.033,0.000667,0,1,1,1,0,12.19,0.026450,21.280072
4,0,7200.0,0.035,0.002000,1,1,1,0,1,21.03,0.023526,14.842428
...,...,...,...,...,...,...,...,...,...,...,...,...
35035,14420,63147600.0,0.033,0.000667,0,0,0,0,0,12.44,0.034401,28.244204
35036,0,63151200.0,0.035,0.002000,0,0,0,0,0,21.18,0.027108,17.224575
35037,14420,63151200.0,0.033,0.000667,0,0,0,0,0,12.44,0.034401,28.244204
35038,0,63154800.0,0.035,0.002000,0,0,0,0,0,21.18,0.027108,17.224575


## Window Generator

In [10]:
sto_nino_df = df[df['x'] == 14420]
sto_nino_df

Unnamed: 0,x,t,fric_coeff,slope,Rainfall_Aries,Rainfall_Boso,Rainfall_Campana,Rainfall_Nangka,Rainfall_Oro,Waterlevel,Velocity,Discharge
1,14420,0.0,0.033,0.000667,0,1,2,0,0,12.18,0.026165,21.033407
3,14420,3600.0,0.033,0.000667,0,1,1,1,0,12.19,0.026450,21.280072
5,14420,7200.0,0.033,0.000667,1,1,1,0,1,12.19,0.026450,21.280072
7,14420,10800.0,0.033,0.000667,0,0,0,1,0,12.20,0.026738,21.529056
9,14420,14400.0,0.033,0.000667,1,1,1,0,0,12.20,0.026738,21.529056
...,...,...,...,...,...,...,...,...,...,...,...,...
35031,14420,63140400.0,0.033,0.000667,0,0,0,0,0,12.44,0.034401,28.244204
35033,14420,63144000.0,0.033,0.000667,0,0,0,0,0,12.44,0.034401,28.244204
35035,14420,63147600.0,0.033,0.000667,0,0,0,0,0,12.44,0.034401,28.244204
35037,14420,63151200.0,0.033,0.000667,0,0,0,0,0,12.44,0.034401,28.244204


In [11]:
n = len(sto_nino_df)

In [12]:
sto_nino_train_df = sto_nino_df[0:int(n*0.5)]
sto_nino_val_df = sto_nino_df[int(n*0.5):int(n*0.75)]
sto_nino_test_df = sto_nino_df[int(n*0.75):]

In [13]:
montalban_df = df[df['x'] == 0]
montalban_df

Unnamed: 0,x,t,fric_coeff,slope,Rainfall_Aries,Rainfall_Boso,Rainfall_Campana,Rainfall_Nangka,Rainfall_Oro,Waterlevel,Velocity,Discharge
0,0,0.0,0.035,0.002,0,1,2,0,0,21.03,0.023526,14.842428
2,0,3600.0,0.035,0.002,0,1,1,1,0,21.03,0.023526,14.842428
4,0,7200.0,0.035,0.002,1,1,1,0,1,21.03,0.023526,14.842428
6,0,10800.0,0.035,0.002,0,0,0,1,0,21.03,0.023526,14.842428
8,0,14400.0,0.035,0.002,1,1,1,0,0,21.03,0.023526,14.842428
...,...,...,...,...,...,...,...,...,...,...,...,...
35030,0,63140400.0,0.035,0.002,0,0,0,0,0,21.18,0.027108,17.224575
35032,0,63144000.0,0.035,0.002,0,0,0,0,0,21.18,0.027108,17.224575
35034,0,63147600.0,0.035,0.002,0,0,0,0,0,21.18,0.027108,17.224575
35036,0,63151200.0,0.035,0.002,0,0,0,0,0,21.18,0.027108,17.224575


In [14]:
montalban_train_df = montalban_df[0:int(n*0.5)]
montalban_val_df = montalban_df[int(n*0.5):int(n*0.75)]
montalban_test_df = montalban_df[int(n*0.75):]

In [33]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift, train_df, val_df, test_df, label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack([labels[:, :, self.column_indices[name]] for name in self.label_columns], axis=-1)

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels
    
    # Creating tf datasets for more convenient use and integration into model in the future
    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=True,
            batch_size=32,)

        ds = ds.map(self.split_window)

        return ds
    
    # properties to access them as tf datasets
    @property
    def train(self):
        return self.make_dataset(self.train_df)

    @property
    def val(self):
        return self.make_dataset(self.val_df)

    @property
    def test(self):
        return self.make_dataset(self.test_df)

    @property
    def example(self):
        """Get and cache an example batch of `inputs, labels` for plotting."""
        result = getattr(self, '_example', None)
        if result is None:
            # No example batch was found, so get one from the `.train` dataset
            result = next(iter(self.train))
            # And cache it for next time
            self._example = result
        return result

In [34]:
sto_nino_wide_window = WindowGenerator(
        input_width=6, label_width=6, shift=1,
        train_df = sto_nino_train_df,
        val_df = sto_nino_val_df,
        test_df = sto_nino_test_df,
        label_columns=['Waterlevel', 'Velocity']
    )

sto_nino_wide_window 

Total window size: 7
Input indices: [0 1 2 3 4 5]
Label indices: [1 2 3 4 5 6]
Label column name(s): ['Waterlevel', 'Velocity']

In [35]:
montalban_wide_window = WindowGenerator(
        input_width=6, label_width=6, shift=1,
        train_df = montalban_train_df,
        val_df = montalban_val_df,
        test_df = montalban_test_df,
        label_columns=['Waterlevel', 'Velocity']
    )

montalban_window 

Total window size: 7
Input indices: [0 1 2 3 4 5]
Label indices: [1 2 3 4 5 6]
Label column name(s): ['Waterlevel', 'Velocity']

In [38]:
CONV_WIDTH = 6
sto_nino_conv_window = WindowGenerator(
    input_width=CONV_WIDTH,
    label_width=1,
    shift=1,
    train_df=sto_nino_train_df,
    val_df=sto_nino_val_df,
    test_df=sto_nino_test_df, 
    label_columns = ['Waterlevel', 'Velocity']
    )
sto_nino_conv_window 

Total window size: 7
Input indices: [0 1 2 3 4 5]
Label indices: [6]
Label column name(s): ['Waterlevel', 'Velocity']

In [39]:
CONV_WIDTH = 6
montalban_conv_window = WindowGenerator(
    input_width=CONV_WIDTH,
    label_width=1,
    shift=1,
    train_df=montalban_train_df,
    val_df=montalban_val_df,
    test_df=montalban_test_df, 
    label_columns = ['Waterlevel', 'Velocity']
    )


sto_nino_conv_window 

Total window size: 7
Input indices: [0 1 2 3 4 5]
Label indices: [6]
Label column name(s): ['Waterlevel', 'Velocity']

In [40]:
train_wide_ds = sto_nino_wide_window.train.concatenate(montalban_wide_window.train)
train_wide_ds = train_wide_ds.shuffle(buffer_size=len(list(train_wide_ds)))

val_wide_ds = sto_nino_wide_window.val.concatenate(montalban_wide_window.val)
val_wide_ds = val_wide_ds.shuffle(buffer_size=len(list(val_wide_ds)))

test_wide_ds = sto_nino_wide_window.test.concatenate(montalban_wide_window.test)
test_wide_ds = test_wide_ds.shuffle(buffer_size=len(list(test_wide_ds)))

In [57]:
train_conv_ds = sto_nino_conv_window.train.concatenate(montalban_conv_window.train)
train_conv_ds = train_conv_ds.shuffle(buffer_size=len(list(train_conv_ds)))

val_conv_ds = sto_nino_conv_window.val.concatenate(montalban_conv_window.val)
val_conv_ds = val_conv_ds.shuffle(buffer_size=len(list(val_conv_ds)))

test_conv_ds = sto_nino_conv_window.test.concatenate(montalban_conv_window.test)
test_conv_ds = test_conv_ds.shuffle(buffer_size=len(list(test_conv_ds)))

In [41]:
train_wide_ds

<ShuffleDataset element_spec=(TensorSpec(shape=(None, 6, 12), dtype=tf.float32, name=None), TensorSpec(shape=(None, 6, 2), dtype=tf.float32, name=None))>

## Evaluation Metrics

In [43]:
def r_square(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x, axis=0)
    my = K.mean(y, axis=0)
    xm, ym = x - mx, y - my
    r_num = K.square(K.sum(xm * ym))
    x_square_sum = K.sum(xm * xm)
    y_square_sum = K.sum(ym * ym)
    r_den = (x_square_sum * y_square_sum) + K.epsilon()
    
    r = r_num / r_den
    return r

In [44]:
def NSE(y_true, y_pred):
    '''
    This is the Nash-Sutcliffe Efficiency Coefficient
    '''
    y_pred = K.flatten(y_pred)
    y_true = K.flatten(y_true)

    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [70]:
# Dense Neural Network
dense_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=2)
])
CONV_WIDTH = 6
# Convolution Neural Network
conv_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=64, kernel_size=(CONV_WIDTH,), activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=2),
])

# LSTM

lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=2)
])

## Training

In [71]:
# For easy compiling and fitting of different models
MAX_EPOCHS = 20

def compile_and_fit(model, train_data=train_wide_ds, val_data=val_wide_ds, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')

    model.compile(
        loss=tf.keras.losses.MeanSquaredError(), 
        optimizer='adam', 
        metrics=[tf.keras.metrics.MeanSquaredError(), NSE, r_square]
    )

    history = model.fit(
        train_data, 
        epochs=MAX_EPOCHS,
        validation_data=val_data,
        callbacks=[early_stopping]
    )

    return history

In [72]:
lstm_history = compile_and_fit(lstm_model)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [73]:
dense_history = compile_and_fit(dense_model)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


In [68]:
# For easy compiling and fitting of different models
MAX_EPOCHS = 20

def compile_and_fit_conv_window(model, train_data=train_conv_ds, val_data=val_conv_ds, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')

    model.compile(
        loss=tf.keras.losses.MeanSquaredError(), 
        optimizer='adam', 
        metrics=[tf.keras.metrics.MeanSquaredError(), NSE, r_square]
    )

    history = model.fit(
        train_data, 
        epochs=MAX_EPOCHS,
        validation_data=val_data,
        callbacks=[early_stopping]
    )

    return history

In [74]:
conv_history = compile_and_fit_conv_window(conv_model)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


In [None]:
# check test predictions: hopefully not constant
# summarize pipeline
# try other models (DNN)

# Predictions

In [75]:
lstm_predictions = lstm_model.predict(test_wide_ds)




In [76]:
lstm_predictions

array([[[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       ...,

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
       

In [83]:
lstm_predictions_water_level = lstm_predictions[:,:,0].flatten()

In [84]:
lstm_predictions_water_level

array([14.159484 , 15.553517 , 15.767181 , ..., 15.7966175, 15.800611 ,
       15.801152 ], dtype=float32)

In [86]:
np.unique(lstm_predictions_water_level)

array([14.159484 , 15.553517 , 15.767181 , 15.7966175, 15.800611 ,
       15.801152 ], dtype=float32)

In [87]:
import numpy as np
import tensorflow as tf

def make_predictions(model, dataset):
    """
    Make predictions using the trained LSTM model
    
    Args:
        model: Trained LSTM model
        dataset: WindowGenerator dataset to predict on
        
    Returns:
        predictions: Array of predictions
        actual: Array of actual values
    """
    predictions = []
    actual = []
    
    # Iterate through the dataset
    for x, y in dataset:
        # Get predictions for this batch
        pred = model.predict(x, verbose=0)
        predictions.append(pred)
        actual.append(y.numpy())
    
    # Concatenate all batches
    predictions = np.concatenate(predictions, axis=0)
    actual = np.concatenate(actual, axis=0)
    
    return predictions, actual

# Example usage for test set
test_predictions, test_actual = make_predictions(lstm_model, test_wide_ds)


In [88]:
test_actual

array([[[12.57      ,  0.03920864],
        [12.87      ,  0.05229918],
        [12.87      ,  0.05229918],
        [13.16      ,  0.06795534],
        [13.22      ,  0.07160477],
        [13.32      ,  0.07802289]],

       [[12.42      ,  0.03370352],
        [12.48      ,  0.03582812],
        [12.5       ,  0.03655897],
        [12.5       ,  0.03655897],
        [12.49      ,  0.03619212],
        [12.47      ,  0.03546698]],

       [[12.66      ,  0.04283196],
        [12.66      ,  0.04283196],
        [12.66      ,  0.04283196],
        [12.66      ,  0.04283196],
        [12.66      ,  0.04283196],
        [12.66      ,  0.04283196]],

       ...,

       [[21.03      ,  0.0235258 ],
        [21.03      ,  0.0235258 ],
        [21.03      ,  0.0235258 ],
        [21.03      ,  0.0235258 ],
        [21.03      ,  0.0235258 ],
        [21.02      ,  0.02330423]],

       [[21.29      ,  0.03007032],
        [21.29      ,  0.03007032],
        [21.29      ,  0.03007032],
       

In [96]:
test_predictions

array([[[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       ...,

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
        [15.7966175 ,  0.05610488],
        [15.800611  ,  0.05613246],
        [15.801152  ,  0.05613683]],

       [[14.159484  ,  0.04457789],
        [15.553517  ,  0.05439339],
        [15.767181  ,  0.05589732],
       

In [None]:
import numpy as np
import tensorflow as tf

def make_predictions(model, dataset):

    predictions = []
    actual = []
    
    # Iterate through the dataset
    for x, y in dataset:
        # Get predictions for this batch
        pred = model.predict(x, verbose=0)
        predictions.append(pred)
        actual.append(y.numpy())
    
    # Concatenate all batches
    predictions = np.concatenate(predictions, axis=0)
    actual = np.concatenate(actual, axis=0)
    
    return predictions, actual

# Example usage for test set
test_predictions, test_actual = make_predictions(lstm_model, test_wide_ds)