In [2]:
import tensorflow as tf

In [26]:
import pandas as pd
from pathlib import Path

In [27]:
import matplotlib.pyplot as plt

In [47]:
def fit_and_evaluate(model, train_set, valid_set, learning_rate, epochs=500):
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(
        monitor="val_mae", patience=50, restore_best_weights=True
    )
    opt = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
    model.compile(loss=tf.keras.losses.Huber(), optimizer=opt, metrics=["mae"])
    history = model.fit(train_set, validation_data=valid_set, epochs=epochs,callbacks=[early_stopping_cb])
    valid_loss, valid_mae = model.evaluate(valid_set)
    return valid_mae*1e6

In [28]:
ridership = r"C:\Users\blais\Documents\ML\data\ridership\CTA_-_Ridership_-_Daily_Boarding_Totals.csv"
df = pd.read_csv(ridership, parse_dates=["service_date"])
df.columns = ["date", "day_type", "bus", "rail", "total"]
df = df.sort_values("date").set_index("date")
df = df.drop("total", axis=1)
df = df.drop_duplicates()

In [29]:
df_mulvar = df[['bus','rail']]/1e6 # use both bus and rail as input
df_mulvar['next_day_type'] = df['day_type'].shift(-1) # we know tomorrow's type
df_mulvar = pd.get_dummies(df_mulvar) # one-hot encode the day type

In [30]:
mulvar_train = df_mulvar["2016-01":"2018-12"]
mulvar_valid = df_mulvar["2019-01":"2019-05"]
mulvar_test = df_mulvar["2019-06":]

In [31]:
mulvar_train.iloc[:, 2:] = mulvar_train.iloc[:,2:].astype('int')
mulvar_valid.iloc[:, 2:] = mulvar_valid.iloc[:,2:].astype('int')

2016-01-01    1
2016-01-02    0
2016-01-03    0
2016-01-04    0
2016-01-05    0
             ..
2018-12-27    0
2018-12-28    1
2018-12-29    0
2018-12-30    0
2018-12-31    0
Name: next_day_type_A, Length: 1096, dtype: int32' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.
  mulvar_train.iloc[:, 2:] = mulvar_train.iloc[:,2:].astype('int')
2016-01-01    0
2016-01-02    1
2016-01-03    0
2016-01-04    0
2016-01-05    0
             ..
2018-12-27    0
2018-12-28    0
2018-12-29    1
2018-12-30    0
2018-12-31    1
Name: next_day_type_U, Length: 1096, dtype: int32' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.
  mulvar_train.iloc[:, 2:] = mulvar_train.iloc[:,2:].astype('int')
2016-01-01    0
2016-01-02    0
2016-01-03    1
2016-01-04    1
2016-01-05    1
             ..
2018-12-27    1
2018-12-28    0
2018-12-29    0
2018-12-30    1
2018-12-31    0
Name: next_day_type_W, Length: 1096, dtype: int32' has dtype inc

Forecasting using a sequence to sequence model:

In [3]:
datax = tf.data.Dataset.range(12)

In [9]:
t1 = datax.window(3, shift=1, drop_remainder=True)

In [10]:
t1 = t1.flat_map(lambda x: x.batch(3))

In [11]:
for x in t1:
    print(x)

tf.Tensor([0 1 2], shape=(3,), dtype=int64)
tf.Tensor([1 2 3], shape=(3,), dtype=int64)
tf.Tensor([2 3 4], shape=(3,), dtype=int64)
tf.Tensor([3 4 5], shape=(3,), dtype=int64)
tf.Tensor([4 5 6], shape=(3,), dtype=int64)
tf.Tensor([5 6 7], shape=(3,), dtype=int64)
tf.Tensor([6 7 8], shape=(3,), dtype=int64)
tf.Tensor([7 8 9], shape=(3,), dtype=int64)
tf.Tensor([ 8  9 10], shape=(3,), dtype=int64)
tf.Tensor([ 9 10 11], shape=(3,), dtype=int64)


In [12]:
t2 = t1.window(4, shift=1, drop_remainder=True)

In [13]:
for x in t2:
    for y in x:
        print(y, end=" ")
    print()

tf.Tensor([0 1 2], shape=(3,), dtype=int64) tf.Tensor([1 2 3], shape=(3,), dtype=int64) tf.Tensor([2 3 4], shape=(3,), dtype=int64) tf.Tensor([3 4 5], shape=(3,), dtype=int64) 
tf.Tensor([1 2 3], shape=(3,), dtype=int64) tf.Tensor([2 3 4], shape=(3,), dtype=int64) tf.Tensor([3 4 5], shape=(3,), dtype=int64) tf.Tensor([4 5 6], shape=(3,), dtype=int64) 
tf.Tensor([2 3 4], shape=(3,), dtype=int64) tf.Tensor([3 4 5], shape=(3,), dtype=int64) tf.Tensor([4 5 6], shape=(3,), dtype=int64) tf.Tensor([5 6 7], shape=(3,), dtype=int64) 
tf.Tensor([3 4 5], shape=(3,), dtype=int64) tf.Tensor([4 5 6], shape=(3,), dtype=int64) tf.Tensor([5 6 7], shape=(3,), dtype=int64) tf.Tensor([6 7 8], shape=(3,), dtype=int64) 
tf.Tensor([4 5 6], shape=(3,), dtype=int64) tf.Tensor([5 6 7], shape=(3,), dtype=int64) tf.Tensor([6 7 8], shape=(3,), dtype=int64) tf.Tensor([7 8 9], shape=(3,), dtype=int64) 
tf.Tensor([5 6 7], shape=(3,), dtype=int64) tf.Tensor([6 7 8], shape=(3,), dtype=int64) tf.Tensor([7 8 9], shape=(3

In [14]:
t2 = t2.flat_map(lambda x: x.batch(4))

In [15]:
for x in t2:
    print(x)

tf.Tensor(
[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]], shape=(4, 3), dtype=int64)
tf.Tensor(
[[1 2 3]
 [2 3 4]
 [3 4 5]
 [4 5 6]], shape=(4, 3), dtype=int64)
tf.Tensor(
[[2 3 4]
 [3 4 5]
 [4 5 6]
 [5 6 7]], shape=(4, 3), dtype=int64)
tf.Tensor(
[[3 4 5]
 [4 5 6]
 [5 6 7]
 [6 7 8]], shape=(4, 3), dtype=int64)
tf.Tensor(
[[4 5 6]
 [5 6 7]
 [6 7 8]
 [7 8 9]], shape=(4, 3), dtype=int64)
tf.Tensor(
[[ 5  6  7]
 [ 6  7  8]
 [ 7  8  9]
 [ 8  9 10]], shape=(4, 3), dtype=int64)
tf.Tensor(
[[ 6  7  8]
 [ 7  8  9]
 [ 8  9 10]
 [ 9 10 11]], shape=(4, 3), dtype=int64)


In [16]:
def to_windows(dataset, length):
    dataset = dataset.window(length, shift=1, drop_remainder=True)
    return dataset.flat_map(lambda x: x.batch(length))

In [20]:
my_series = tf.data.Dataset.range(7)

dataset = to_windows(to_windows(my_series, 3), 4)

In [21]:
list(dataset)

[<tf.Tensor: shape=(4, 3), dtype=int64, numpy=
 array([[0, 1, 2],
        [1, 2, 3],
        [2, 3, 4],
        [3, 4, 5]], dtype=int64)>,
 <tf.Tensor: shape=(4, 3), dtype=int64, numpy=
 array([[1, 2, 3],
        [2, 3, 4],
        [3, 4, 5],
        [4, 5, 6]], dtype=int64)>]

In [23]:
dataset = dataset.map(lambda S: (S[:,0], S[:,1:]))
list(dataset)

[(<tf.Tensor: shape=(4,), dtype=int64, numpy=array([0, 1, 2, 3], dtype=int64)>,
  <tf.Tensor: shape=(4, 2), dtype=int64, numpy=
  array([[1, 2],
         [2, 3],
         [3, 4],
         [4, 5]], dtype=int64)>),
 (<tf.Tensor: shape=(4,), dtype=int64, numpy=array([1, 2, 3, 4], dtype=int64)>,
  <tf.Tensor: shape=(4, 2), dtype=int64, numpy=
  array([[2, 3],
         [3, 4],
         [4, 5],
         [5, 6]], dtype=int64)>)]

Creating a utility function to prepare the datasets for our sequence-to-sequence model. It will also take care of shuffling (optional) and batching:

In [24]:
def to_seq2seq_dataset(series, seq_length=56, ahead=14, target_col=1, batch_size=32, shuffle=False, seed=None):
    ds = to_windows(tf.data.Dataset.from_tensor_slices(series), ahead+1)
    ds = to_windows(ds, seq_length).map(lambda S: (S[:,0],S[:,1:,1]))
    if shuffle:
        ds = ds.shuffle(8*batch_size, seed=seed)
    return ds.batch(batch_size)

Now - create the datasets:

In [40]:
seq2seq_train = to_seq2seq_dataset(mulvar_train, shuffle=True, seed=42)
seq2seq_valid = to_seq2seq_dataset(mulvar_valid)

In [44]:
for x,y in seq2seq_train:
    print(x.shape)
    print(y.shape)
    break

(32, 56, 5)
(32, 56, 14)


In [45]:
for x,y in seq2seq_valid:
    print(x.shape)
    print(y.shape)
    break

(32, 56, 5)
(32, 56, 14)


and lastly, build the sequence-to-sequence model:

In [41]:
seq2seq_model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(32, return_sequences=True, input_shape=[None, 5]),
    tf.keras.layers.Dense(14)
])

  super().__init__(**kwargs)


In [43]:
seq2seq_model.summary()

In [48]:
fit_and_evaluate(seq2seq_model, seq2seq_train, seq2seq_valid, learning_rate=0.1)

Epoch 1/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.1074 - mae: 0.3518 - val_loss: 0.0172 - val_mae: 0.1400




Epoch 2/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0126 - mae: 0.1208 - val_loss: 0.0088 - val_mae: 0.0939
Epoch 3/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0077 - mae: 0.0884 - val_loss: 0.0069 - val_mae: 0.0794
Epoch 4/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0063 - mae: 0.0769 - val_loss: 0.0062 - val_mae: 0.0755
Epoch 5/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0055 - mae: 0.0702 - val_loss: 0.0055 - val_mae: 0.0690
Epoch 6/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0050 - mae: 0.0664 - val_loss: 0.0053 - val_mae: 0.0675
Epoch 7/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0047 - mae: 0.0644 - val_loss: 0.0049 - val_mae: 0.0633
Epoch 8/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0

47188.665717840195

In [50]:
import numpy as np

In [52]:
X = mulvar_valid.to_numpy()[np.newaxis, :56]

In [53]:
X.shape

(1, 56, 5)

In [54]:
y_pred_14 = seq2seq_model.predict(X)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step


In [55]:
y_pred_14.shape

(1, 56, 14)

In [56]:
# after training, only the output of the very last time step matters and the rest can be ignored:
y_pred_14 = y_pred_14[0, -1]

In [57]:
y_pred_14

array([0.70501226, 0.71439546, 0.7199814 , 0.68423545, 0.35979554,
       0.25270474, 0.6364544 , 0.7128575 , 0.731151  , 0.74287456,
       0.68169916, 0.3750486 , 0.28020662, 0.65740323], dtype=float32)

In [None]:
Y_pred_valid = seq2seq_model.predict(seq2seq_valid)

**Layer Normalization:**
- Note: the states argument is a list containing one or more tensors. In the case of a simple RNN cell, it contains a single tensor equal to the outputs of the previous timestep, but other cells have multiple state tensors (e.g. an LSTM cell has a long-term state and a short-term state). A cell must also have a state_size attribute and an output_size attribute. In a simple rnn, both are simply equal to the number of units. the following code implements a custom memory cell that will behave like a SimpleRNNCell, except it will also apply layer normalization at each timestep:

In [59]:
# implementing a custom memory cell that will behave like a SimpleRNNCell, except it will also apply layer normalization at each timestep

class LNSimpleRNNCell(tf.keras.layers.Layer):
    def __init__(self, units, activation='tanh',**kwargs):
        super().__init__(**kwargs)
        self.state_size = units
        self.output_size = units
        self.simple_rnn_cell = tf.keras.layers.SimpleRNNCell(units, activation=None)
        self.layer_norm = tf.keras.layers.LayerNormalization()
        self.activation = tf.keras.activations.get(activation)
    
    def call(self, inputs, states):
        outputs, new_states = self.simple_rnn_cell(inputs, states)
        norm_outputs = self.activation(self.layer_norm(outputs))
        return norm_outputs, [norm_outputs]

To use this custom cell, all we need to do is create a tf.keras.layers.RNN layer, passing it a cell instance:

In [60]:
custom_ln_model = tf.keras.Sequential([
    tf.keras.layers.RNN(LNSimpleRNNCell(32), return_sequences=True, input_shape=[None,5]),
    tf.keras.layers.Dense(14)
])

  super().__init__(**kwargs)


In [61]:
fit_and_evaluate(custom_ln_model, seq2seq_train, seq2seq_valid,
                 learning_rate=0.1, epochs=5)

Epoch 1/5
     32/Unknown [1m2s[0m 14ms/step - loss: 0.1524 - mae: 0.4165



[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.1476 - mae: 0.4079 - val_loss: 0.0180 - val_mae: 0.1446
Epoch 2/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0146 - mae: 0.1408 - val_loss: 0.0143 - val_mae: 0.1184
Epoch 3/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0111 - mae: 0.1191 - val_loss: 0.0122 - val_mae: 0.1086
Epoch 4/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0097 - mae: 0.1077 - val_loss: 0.0108 - val_mae: 0.1023
Epoch 5/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0086 - mae: 0.0991 - val_loss: 0.0090 - val_mae: 0.0937
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0100 - mae: 0.0976


93715.73477983475

**Tackling the Short-Term Memory Problem:**

**LSTM Cells:**

In [64]:
# lstms - performs much better; training converges faster, and it will detect longer-term patterns in the data

tf.random.set_seed(42)
lstm_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(32, return_sequences=True, input_shape=[None, 5]),
    tf.keras.layers.Dense(14)
])

  super().__init__(**kwargs)


In [65]:
fit_and_evaluate(lstm_model, seq2seq_train, seq2seq_valid,
                 learning_rate=0.1, epochs=5)

Epoch 1/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0802 - mae: 0.3266 - val_loss: 0.0189 - val_mae: 0.1694




Epoch 2/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0187 - mae: 0.1651 - val_loss: 0.0177 - val_mae: 0.1464
Epoch 3/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0162 - mae: 0.1530 - val_loss: 0.0168 - val_mae: 0.1415
Epoch 4/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0153 - mae: 0.1479 - val_loss: 0.0161 - val_mae: 0.1370
Epoch 5/5
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0144 - mae: 0.1427 - val_loss: 0.0153 - val_mae: 0.1342
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 834us/step - loss: 0.0160 - mae: 0.1364


134229.376912117

Alternatively - could use the general-purpose tf.keras.layers.RNN layer - giving it an LSTMCell as an argument. usually for cases where you want to modify per timestep operations. 

-

**Using 1D Convolutional Layers to process sequences:**
- saw a 2D conv layer works by sliding several fairly small kernels 
- 1D conv layer slides several kernels across a sequence, producing a 1D feature map per kernel
- if you use a 1D conv layer with a stride of 1 and 'same' padding - then the output sequence will have the same length as the input sequence
- if you use 'valid' padding or a stride > 1, then the output sequence
- following model shortens the sequences - by shortening the sequences the conv layer may help the GRU layers detect longer patterns - so we can afford to double the input sequence length to 112. 

In [33]:
t_d = tf.data.Dataset.from_tensor_slices(mulvar_train)

In [36]:
t_d2 = to_windows(t_d, 15)

In [38]:
t_d3 = to_windows(t_d2, 56)

In [39]:
for y in t_d3:
    print(y)

tf.Tensor(
[[[0.303321 0.319835 1.       0.       0.      ]
  [0.448859 0.365509 0.       1.       0.      ]
  [0.34054  0.287661 0.       0.       1.      ]
  ...
  [0.78722  0.716663 0.       0.       1.      ]
  [0.859099 0.758116 0.       0.       1.      ]
  [0.858032 0.742622 1.       0.       0.      ]]

 [[0.448859 0.365509 0.       1.       0.      ]
  [0.34054  0.287661 0.       0.       1.      ]
  [0.829429 0.703185 0.       0.       1.      ]
  ...
  [0.859099 0.758116 0.       0.       1.      ]
  [0.858032 0.742622 1.       0.       0.      ]
  [0.486797 0.404212 0.       1.       0.      ]]

 [[0.34054  0.287661 0.       0.       1.      ]
  [0.829429 0.703185 0.       0.       1.      ]
  [0.846789 0.727716 0.       0.       1.      ]
  ...
  [0.858032 0.742622 1.       0.       0.      ]
  [0.486797 0.404212 0.       1.       0.      ]
  [0.266532 0.24116  0.       0.       1.      ]]

 ...

 [[0.890949 0.765309 0.       0.       1.      ]
  [0.79516  0.725712 0.     

In [37]:
for x in t_d2:
    print(x)

tf.Tensor(
[[0.303321 0.319835 1.       0.       0.      ]
 [0.448859 0.365509 0.       1.       0.      ]
 [0.34054  0.287661 0.       0.       1.      ]
 [0.829429 0.703185 0.       0.       1.      ]
 [0.846789 0.727716 0.       0.       1.      ]
 [0.853904 0.733167 0.       0.       1.      ]
 [0.846274 0.736809 0.       0.       1.      ]
 [0.841494 0.732495 1.       0.       0.      ]
 [0.469539 0.391597 0.       1.       0.      ]
 [0.306896 0.263073 0.       0.       1.      ]
 [0.753006 0.694734 0.       0.       1.      ]
 [0.760149 0.705694 0.       0.       1.      ]
 [0.78722  0.716663 0.       0.       1.      ]
 [0.859099 0.758116 0.       0.       1.      ]
 [0.858032 0.742622 1.       0.       0.      ]], shape=(15, 5), dtype=float64)
tf.Tensor(
[[0.448859 0.365509 0.       1.       0.      ]
 [0.34054  0.287661 0.       0.       1.      ]
 [0.829429 0.703185 0.       0.       1.      ]
 [0.846789 0.727716 0.       0.       1.      ]
 [0.853904 0.733167 0.       0.   

In [35]:
for x in t_d:
    print(x)

tf.Tensor([0.303321 0.319835 1.       0.       0.      ], shape=(5,), dtype=float64)
tf.Tensor([0.448859 0.365509 0.       1.       0.      ], shape=(5,), dtype=float64)
tf.Tensor([0.34054  0.287661 0.       0.       1.      ], shape=(5,), dtype=float64)
tf.Tensor([0.829429 0.703185 0.       0.       1.      ], shape=(5,), dtype=float64)
tf.Tensor([0.846789 0.727716 0.       0.       1.      ], shape=(5,), dtype=float64)
tf.Tensor([0.853904 0.733167 0.       0.       1.      ], shape=(5,), dtype=float64)
tf.Tensor([0.846274 0.736809 0.       0.       1.      ], shape=(5,), dtype=float64)
tf.Tensor([0.841494 0.732495 1.       0.       0.      ], shape=(5,), dtype=float64)
tf.Tensor([0.469539 0.391597 0.       1.       0.      ], shape=(5,), dtype=float64)
tf.Tensor([0.306896 0.263073 0.       0.       1.      ], shape=(5,), dtype=float64)
tf.Tensor([0.753006 0.694734 0.       0.       1.      ], shape=(5,), dtype=float64)
tf.Tensor([0.760149 0.705694 0.       0.       1.      ], shape=(

In [32]:
mulvar_train

Unnamed: 0_level_0,bus,rail,next_day_type_A,next_day_type_U,next_day_type_W
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-01,0.303321,0.319835,1,0,0
2016-01-02,0.448859,0.365509,0,1,0
2016-01-03,0.340540,0.287661,0,0,1
2016-01-04,0.829429,0.703185,0,0,1
2016-01-05,0.846789,0.727716,0,0,1
...,...,...,...,...,...
2018-12-27,0.509948,0.453029,0,0,1
2018-12-28,0.577497,0.493961,1,0,0
2018-12-29,0.394088,0.307105,0,1,0
2018-12-30,0.314550,0.265310,0,0,1


In [None]:
seq2seq_train = to_seq2seq_dataset(mulvar)