In [1]:
import datetime as dt
import numpy as np
import pandas as pd
import tensorflow as tf

from model.tf.hydro import ProductionStorage
from model.tf.ml import ConvNet, LSTM
from data.tf.camels_dataset import CamelsDataset, HybridDataset

2024-08-30 07:59:43.980356: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
window_size = 10
camels_dir = '../data/camels/aus'

In [3]:
camels_ds = CamelsDataset(data_dir=camels_dir, 
                          station_list=['422321B'],
                          # state_outlet='WA', map_zone=50
                          target_vars=['flow_cdf'],
                          window_size=window_size)
train_ds, test_ds = camels_ds.get_datasets(batch_size=256)

In [10]:
ts_input_dim = 4
hidden_dim = 32
lstm_dim = 64
n_layers = 2
ts_output_dim = 32
dropout = 0.1

static_input_dim = 7
static_hidden_dim = 32
static_output_dim = 32

combined_hidden_dim = 32

In [11]:
def get_model():
    ts_model = LSTM(input_dim=ts_input_dim,
                hidden_dim=hidden_dim,
                lstm_dim=lstm_dim,
                n_layers=n_layers,
                output_dim=ts_output_dim,
                dropout=dropout)

    static_model = tf.keras.Sequential([
                        tf.keras.layers.Dense(static_hidden_dim, activation='relu'),
                        tf.keras.layers.Dense(static_hidden_dim, activation='relu'),
                        tf.keras.layers.Dense(static_output_dim, activation='linear')
                    ])

    timeseries = tf.keras.Input(shape=(window_size, ts_input_dim), name='timeseries')
    static = tf.keras.Input(shape=(static_input_dim,), name='static')

    concatenated = tf.keras.layers.Concatenate()([ts_model(timeseries), static_model(static)])
    hidden = tf.keras.layers.Dense(combined_hidden_dim, activation='relu')(concatenated)
    hidden = tf.keras.layers.Dense(combined_hidden_dim, activation='relu')(concatenated)
    output = tf.keras.layers.Dense(1)(hidden)

    model_combined = tf.keras.Model(inputs=[timeseries, static], outputs=output)
    return model_combined

In [15]:

epochs = 50

loss_fn = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=8e-4, beta_1=0.89, beta_2=0.999, weight_decay=2e-3)
callbacks = [tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]

model = get_model()


for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    epoch_loss = 0.0

    # Iterate over the batches of the dataset.
    for step, batch in enumerate(train_ds):
        
        with tf.GradientTape() as tape:
            
            # Run the forward pass of the layer.
            out = model([batch['timeseries'], batch['static']], training=True) 

            # Compute the loss value for this minibatch.
            loss_value = loss_fn(batch['target'], out)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        epoch_loss += loss_value
    
    epoch_loss = epoch_loss/len(train_ds)
    
    val_loss = 0.

    for step, batch in enumerate(test_ds):
            
        # Run the forward pass of the layer.
        out = model([batch['timeseries'], batch['static']], training=False) 

        # Compute the loss value for this minibatch.
        loss_value = loss_fn(batch['target'], out)

        val_loss += loss_value

    val_loss = val_loss/len(test_ds)
    
    print(f"Epoch {epoch + 1} loss: {epoch_loss.numpy():.4f}, val_loss: {val_loss.numpy():.4f}")


Start of epoch 0


2024-08-30 08:48:43.878339: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 1 loss: 0.7514

Start of epoch 1


2024-08-30 08:48:49.509987: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 2 loss: 0.6412

Start of epoch 2


2024-08-30 08:48:55.129195: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 3 loss: 0.6297

Start of epoch 3


2024-08-30 08:49:01.129448: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 4 loss: 0.6184

Start of epoch 4


2024-08-30 08:49:07.848437: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 5 loss: 0.6179

Start of epoch 5


2024-08-30 08:49:13.485982: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 6 loss: 0.6104

Start of epoch 6


2024-08-30 08:49:18.504682: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 7 loss: 0.6094

Start of epoch 7


2024-08-30 08:49:23.278312: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 8 loss: 0.6043

Start of epoch 8


2024-08-30 08:49:28.707596: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 9 loss: 0.6044

Start of epoch 9


2024-08-30 08:49:35.146343: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 10 loss: 0.6026

Start of epoch 10


2024-08-30 08:49:41.273183: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 11 loss: 0.5987

Start of epoch 11


2024-08-30 08:49:47.037734: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 12 loss: 0.5977

Start of epoch 12


2024-08-30 08:49:52.911300: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 13 loss: 0.5956

Start of epoch 13


2024-08-30 08:49:58.279909: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 14 loss: 0.5950

Start of epoch 14


2024-08-30 08:50:03.693036: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 15 loss: 0.5965

Start of epoch 15


2024-08-30 08:50:09.307259: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 16 loss: 0.5927

Start of epoch 16


2024-08-30 08:50:15.191296: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 17 loss: 0.5904

Start of epoch 17
Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x7f8046089300>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/Users/akap5486/miniconda3/envs/hydroml/lib/python3.10/site-packages/keras/src/backend/tensorflow/rnn.py", line 418, in <genexpr>
    output_ta_t = tuple(  File "/Users/akap5486/miniconda3/envs/hydroml/lib/python3.10/site-packages/tensorflow/python/util/tf_should_use.py", line 288, in wrapped


KeyboardInterrupt: 

tf.Tensor(0.1734502, shape=(), dtype=float32)


2024-08-29 21:13:47.167967: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
gr4j_logfile = 'results/gr4j/result.csv'
prod = ProductionStorage()
hybrid_ds = HybridDataset(data_dir=camels_dir, gr4j_logfile=gr4j_logfile, 
                          station_list=['314213'],
                          # state_outlet='WA', map_zone=50,
                          prod=prod, window_size=window_size)

In [None]:
train_ds, test_ds = hybrid_ds.get_datasets(batch_size=256)

In [None]:
for batch in train_ds:
    print(batch['station_id'].shape, batch['timeseries'].shape, batch['static'].shape, batch['target'].shape)
    break

(256, 1) (256, 7, 8) (256, 7) (256, 1)
