In [75]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import init_ops
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from zipfile import ZipFile
import os

In [10]:
uri = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip"
zip_path = keras.utils.get_file(origin=uri, fname="jena_climate_2009_2016.csv.zip")
zip_file = ZipFile(zip_path)
zip_file.extractall()
csv_path = "jena_climate_2009_2016.csv"

df = pd.read_csv(csv_path)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip


In [14]:
titles = [
    "Pressure",
    "Temperature",
    "Temperature in Kelvin",
    "Temperature (dew point)",
    "Relative Humidity",
    "Saturation vapor pressure",
    "Vapor pressure",
    "Vapor pressure deficit",
    "Specific humidity",
    "Water vapor concentration",
    "Airtight",
    "Wind speed",
    "Maximum wind speed",
    "Wind direction in degrees",
]

feature_keys = [
    "p (mbar)",
    "T (degC)",
    "Tpot (K)",
    "Tdew (degC)",
    "rh (%)",
    "VPmax (mbar)",
    "VPact (mbar)",
    "VPdef (mbar)",
    "sh (g/kg)",
    "H2OC (mmol/mol)",
    "rho (g/m**3)",
    "wv (m/s)",
    "max. wv (m/s)",
    "wd (deg)",
]

colors = [
    "blue",
    "orange",
    "green",
    "red",
    "purple",
    "brown",
    "pink",
    "gray",
    "olive",
    "cyan",
]

date_time_key = "Date Time"

In [12]:
split_fraction = 0.715
train_split = int(split_fraction * int(df.shape[0]))
step = 6

past = 720
future = 72
learning_rate = 0.001
batch_size = 256
epochs = 10


def normalize(data, train_split):
    data_mean = data[:train_split].mean(axis=0)
    data_std = data[:train_split].std(axis=0)
    return (data - data_mean) / data_std

In [15]:
print(
    "The selected parameters are:",
    ", ".join([titles[i] for i in [0, 1, 5, 7, 8, 10, 11]]),
)
selected_features = [feature_keys[i] for i in [0, 1, 5, 7, 8, 10, 11]]
features = df[selected_features]
features.index = df[date_time_key]
features.head()

features = normalize(features.values, train_split)
features = pd.DataFrame(features)
features.head()

train_data = features.loc[0 : train_split - 1]
val_data = features.loc[train_split:]

The selected parameters are: Pressure, Temperature, Saturation vapor pressure, Vapor pressure deficit, Specific humidity, Airtight, Wind speed


In [16]:
start = past + future
end = start + train_split

x_train = train_data[[i for i in range(7)]].values
y_train = features.iloc[start:end][[1]]

sequence_length = int(past / step)

In [17]:
dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    x_train,
    y_train,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

In [18]:
x_end = len(val_data) - past - future

label_start = train_split + past + future

x_val = val_data.iloc[:x_end][[i for i in range(7)]].values
y_val = features.iloc[label_start:][[1]]

dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)


for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Input shape: (256, 120, 7)
Target shape: (256, 1)


Training with ANN.

In [38]:
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out = Linear(4)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_9 (InputLayer)        [(None, 120, 7)]          0         
                                                                 
 linear_2 (Linear)           (None, 120, 4)            32        
                                                                 
 dense_4 (Dense)             (None, 120, 1)            5         
                                                                 
Total params: 37
Trainable params: 37
Non-trainable params: 0
_________________________________________________________________


In [39]:
model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21e8963bf98>

Training with LSTM.

In [40]:
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out, _ = LSTM(4)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 120, 7)]          0         
                                                                 
 lstm_6 (LSTM)               ((None, 120, 4),          192       
                              (None, 240, 4))                    
                                                                 
 dense_5 (Dense)             (None, 120, 1)            5         
                                                                 
Total params: 197
Trainable params: 197
Non-trainable params: 0
_________________________________________________________________


In [41]:
model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21e896338d0>

In [43]:
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out, _ = LSTM(4, trans=True, iters=5)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 120, 7)]          0         
                                                                 
 lstm_7 (LSTM)               ((None, 120, 4),          248       
                              (None, 240, 4))                    
                                                                 
 dense_6 (Dense)             (None, 120, 1)            5         
                                                                 
Total params: 253
Trainable params: 253
Non-trainable params: 0
_________________________________________________________________


In [44]:
model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21e8b118b38>

In [77]:
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out, _ = RRU(units=4)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()

Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_19 (InputLayer)       [(None, 120, 7)]          0         
                                                                 
 rru_7 (RRU)                 ((None, 120, 4),          287       
                              (None, 120, 4))                    
                                                                 
 dense_11 (Dense)            (None, 120, 1)            5         
                                                                 
Total params: 292
Trainable params: 292
Non-trainable params: 0
_________________________________________________________________


In [78]:
model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21e8e82fcf8>

# Code Repo

### Code Snippet A: Adam Optimizer.

In [2]:
opt = tf.keras.optimizers.Adam(learning_rate=0.1)
var1 = tf.Variable(10.0)
loss = lambda: (var1 ** 2)/2.0       # d(loss)/d(var1) == var1
step_count = opt.minimize(loss, [var1]).numpy()
# The first step is `-learning_rate*sign(grad)`
var1.numpy()

9.9

### Code Snippet B: Layer Interface.

In [4]:
class Linear(keras.layers.Layer):
    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

### Code Snippet C: Naive LSTM

In [42]:
'''
A Naive LSTM Cell Implementation.

@link tensorflow/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py#BasicLSTMCell
@link https://github.com/piEsposito/pytorch-lstm-by-hand/blob/master/LSTM.ipynb
@date MAR-29-2022
@note straightforward, in sacrifice of efficiency
'''
class LSTM(keras.layers.Layer):
    def __init__(self, units=32, trans=False, iters=3):
        super(LSTM, self).__init__()
        self.units = units
        self.trans = trans
        self.iters = iters

    def build(self, input_shape):
        self.W_i = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
        self.U_i = self.add_weight(shape=(self.units, self.units), initializer="random_normal", trainable=True)
        self.b_i = self.add_weight(shape=(self.units, ), initializer="random_normal", trainable=True)

        self.W_f = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
        self.U_f = self.add_weight(shape=(self.units, self.units), initializer="random_normal", trainable=True)
        self.b_f = self.add_weight(shape=(self.units, ), initializer="random_normal", trainable=True)

        self.W_c = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
        self.U_c = self.add_weight(shape=(self.units, self.units), initializer="random_normal", trainable=True)
        self.b_c = self.add_weight(shape=(self.units, ), initializer="random_normal", trainable=True)

        self.W_o = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)
        self.U_o = self.add_weight(shape=(self.units, self.units), initializer="random_normal", trainable=True)
        self.b_o = self.add_weight(shape=(self.units, ), initializer="random_normal", trainable=True)

        if self.trans:
            self.Q   = self.add_weight(shape=(self.units, input_shape[-1]), initializer="random_normal", trainable=True)
            self.R   = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True)

    def mogrigy(self, x_t, h_t):
        sigmoid = math_ops.sigmoid
        for i in range(1, self.iters + 1):
            if (i % 2 == 0):
                h_t = (2 * sigmoid(x_t @ self.R)) * h_t
            else:
                x_t = (2 * sigmoid(h_t @ self.Q)) * x_t
        return x_t, h_t

    def call(self, inputs, init_states=None):
        bs = inputs.shape[1]
        one = constant_op.constant(1, dtype=dtypes.int32)
        sigmoid = math_ops.sigmoid
        tanh = math_ops.tanh
        if init_states is None:
            h_t, c_t = (
                tf.zeros((bs, self.units)),
                tf.zeros((bs, self.units)),
            )
        else:
            h_t, c_t = array_ops.split(value=init_states, num_or_size_splits=2, axis=one)

        x_t = inputs

        if self.trans:
            x_t, h_t = self.mogrigy(x_t, h_t)

        i_t = sigmoid(x_t @ self.W_i + h_t @ self.U_i + self.b_i)
        f_t = sigmoid(x_t @ self.W_f + h_t @ self.U_f + self.b_f)
        g_t = tanh(x_t @ self.W_c + h_t @ self.U_c + self.b_c)
        o_t = sigmoid(x_t @ self.W_o + h_t @ self.U_o + self.b_o)
        c_t = f_t * c_t + i_t * g_t
        h_t = o_t * tanh(c_t)
            
        new_state = array_ops.concat([h_t, c_t], 1)
        return h_t, new_state

### Code Snippet D Residual Recurrent Unit

In [76]:
class RRU(keras.layers.Layer):
    def __init__(self, units=32, q=1.0, dropout_rate=0.5):
        super(RRU, self).__init__()
        self.n = units
        self.p = 1
        self.q = q
        self.dropout_rate = dropout_rate
    
    def instance_norm(self, cur):
        variance = tf.reduce_mean(tf.square(cur), [-1], keepdims=True)
        cur = cur * tf.math.rsqrt(variance + 1e-6)
        return cur
    
    def inv_sigmoid(self, y):
        return np.log(y / (1 - y))

    def build(self, input_shape):
        self.m = input_shape[-1]
        self.g = round(self.q * (self.m + self.n))

        self.W_x = self.add_weight(shape=(self.m, self.g), initializer="random_normal", trainable=True)
        self.W_h = self.add_weight(shape=(self.n, self.g), initializer="random_normal", trainable=True)
        self.B_j = self.add_weight(shape=(self.g,       ), initializer=tf.zeros_initializer(), trainable=True)

        self.W_k = self.add_weight(shape=(self.g, self.g), initializer="random_normal", trainable=True)
        self.B_k = self.add_weight(shape=(self.g,       ), initializer=tf.zeros_initializer(), trainable=True)

        self.W_c = self.add_weight(shape=(self.g, self.p), initializer="random_normal", trainable=True)
        self.B_c = self.add_weight(shape=(self.n,       ), initializer=tf.zeros_initializer(), trainable=True)

        # self.W_o = self.add_weight(shape=(self.g, self.n), initializer="random_normal", trainable=True)
        # self.B_o = self.add_weight(shape=(self.p,       ), initializer=tf.zeros_initializer(), trainable=True)

        self.S = self.add_weight(shape=(self.n,       ), initializer=init_ops.constant_initializer(self.inv_sigmoid(np.random.uniform(0.01, 0.99, size=self.n)) / self.q), trainable=True)
        self.Z = self.add_weight(shape=(self.n,       ), initializer=tf.zeros_initializer(), trainable=True)

    def call(self, inputs, init_states=None):
        bs = inputs.shape[1]
        sigmoid = math_ops.sigmoid
        relu = tf.nn.relu
        dropout = tf.nn.dropout
        if init_states is None:
            h_t = tf.zeros((bs, self.n))
        else:
            h_t = init_states
        x_t = inputs
        j = relu(self.instance_norm(x_t @ self.W_x + h_t @ self.W_h + self.B_j))
        j = relu(j @ self.W_k + self.B_k)
        d = dropout(j, rate=self.dropout_rate)
        c = d @ self.W_c + self.B_c
        h_t = h_t * sigmoid(self.S) + c * self.Z
        # o_t = d @ self.W_o + self.B_o
        return h_t, h_t