In [1]:
import pandas as pd
from pathlib import Path
path = Path("C:\\Users\\manch\\OneDrive\\Documents\\DEV\\MachineLearning\\datasets\\CTA_-_Ridership_-_Daily_Boarding_Totals_20241230.csv")

df = pd.read_csv(path, parse_dates=["service_date"])
df.columns = ["date", "day_type", "bus", "rail", "total"] #shorter names
df = df.sort_values("date").set_index("date")
df = df.drop("total", axis=1) # no need for total, it's just bus+ rail
df = df.drop_duplicates() # remove duplicated months (2011-10and 2014-07)
df

Unnamed: 0_level_0,day_type,bus,rail
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2001-01-01,U,297192,126455
2001-01-02,W,780827,501952
2001-01-03,W,824923,536432
2001-01-04,W,870021,550011
2001-01-05,W,890426,557917
...,...,...,...
2024-10-27,U,312965,215594
2024-10-28,W,611041,389359
2024-10-29,W,652674,444706
2024-10-30,W,657942,451915


In [2]:
import tensorflow as tf
import numpy as np

df_mulvar                   = df[["bus", "rail"]] / 1e6 # use both bus & rail series as input
df_mulvar["next_day_type"]  = df["day_type"].shift(-1)  # we know tomorrow's type
df_mulvar                   = pd.get_dummies(df_mulvar) # one-hot encode the day type Now df_mulvar is a DataFrame with five columns: the bus and rail data,
                                                        # plus three columns containing the one-hot encoding of the next day’s type
                                                        # (recall that there are three possible day types, W, A, and U).

#Split the data into three periods. For training, validation, and testing:
mulvar_train = df_mulvar["2016-01":"2018-12"]
mulvar_valid = df_mulvar["2019-01":"2019-05"]
mulvar_test  = df_mulvar["2019-06":]

# Ensure all columns are numeric, converting booleans to float32
mulvar_train = mulvar_train.astype(np.float32)
mulvar_valid = mulvar_valid.astype(np.float32)
mulvar_test  = mulvar_test.astype(np.float32)

In [3]:
#Create the datasets:
seq_length=56
batch_size=32
train_mulvar_ds = tf.keras.utils.timeseries_dataset_from_array(
                                                                mulvar_train.to_numpy(),                    # use all 5 columns as input
                                                                targets=mulvar_train["rail"].iloc[seq_length:].to_numpy(dtype=np.float32),  # forecast only the rail series
                                                                                                            # the other 4 arguments are the same as earlier
                                                                sequence_length=seq_length,
                                                                batch_size=batch_size,
                                                                shuffle=True,
                                                                seed=42
                                                                )


valid_mulvar_ds = tf.keras.utils.timeseries_dataset_from_array(
                                                                mulvar_valid.to_numpy(),
                                                                targets=mulvar_valid["rail"].iloc[seq_length:].to_numpy(dtype=np.float32),
                                                                sequence_length=seq_length,
                                                                batch_size=batch_size

                                                                )


In [4]:
#Create the GRU RNN:
mulvar_model = tf.keras.Sequential(
                                        [
                                            tf.keras.layers.GRU(32, input_shape=[None, 5]),
                                            tf.keras.layers.Dense(1)
                                        ]
                                    )

opt = tf.keras.optimizers.SGD(learning_rate=0.02, momentum=0.9)

mulvar_model.compile(loss=tf.keras.losses.Huber(), optimizer=opt,metrics=["mae"])

early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_mae", patience=50, restore_best_weights=True)

history_mulvar_model    =   mulvar_model.fit(
                                                train_mulvar_ds, 
                                                validation_data=valid_mulvar_ds,
                                                epochs=500,
                                                callbacks=[early_stopping_cb]
        )
# Get MAE values for training and validation
mae_values = history_mulvar_model.history['mae']  # training MAE
val_mae_values = history_mulvar_model.history['val_mae']  # validation MAE

# Get loss values
loss_values = history_mulvar_model.history['loss']  # training loss
val_loss_values = history_mulvar_model.history['val_loss']  # validation loss

plt.figure(figsize=(10, 6))
plt.plot(mae_values, label='Training MAE')
plt.plot(val_mae_values, label='Validation MAE')
plt.title('Model MAE over Epochs')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()
plt.grid(True)
plt.show()

# Get predictions
test_predictions = mulvar_model.predict(valid_mulvar_ds)

# Since we're using a sequence length of 56, our predictions start at index 56
# We need to align the predictions with the actual values
actual_values = mulvar_valid["rail"][seq_length:].values
mae = (pd.DataFrame(test_predictions*1e6) - pd.DataFrame(actual_values*1e6)).abs().mean() 
mae

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(test_predictions, label='Predicted values')
plt.plot(actual_values, label='Actual Values')
plt.title('Model MAE over Epochs')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()
plt.grid(True)
plt.show()

Epoch 1/500


  super().__init__(**kwargs)


[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - loss: 0.1005 - mae: 0.3701 - val_loss: 0.0057 - val_mae: 0.0939
Epoch 2/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0066 - mae: 0.0942 - val_loss: 0.0033 - val_mae: 0.0685
Epoch 3/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0034 - mae: 0.0623 - val_loss: 0.0028 - val_mae: 0.0634
Epoch 4/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.0031 - mae: 0.0589 - val_loss: 0.0023 - val_mae: 0.0552
Epoch 5/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0029 - mae: 0.0569 - val_loss: 0.0028 - val_mae: 0.0641
Epoch 6/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.0031 - mae: 0.0584 - val_loss: 0.0023 - val_mae: 0.0564
Epoch 7/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0026

KeyboardInterrupt: 

In [None]:
print(mae)

In [5]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.0):
        """
        Initializes the LSTM model.

        Parameters:
            input_size (int): Number of features in the input.
            hidden_size (int): Number of features in the hidden state.
            num_layers (int): Number of LSTM layers.
            output_size (int): Number of outputs (e.g., number of classes or regression target size).
            dropout (float): Dropout rate between LSTM layers (applied if num_layers > 1).
        """
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Define the LSTM layer(s)
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Define the output layer that maps the last hidden state to the desired output size.
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        """
        Forward pass through the network.

        Parameters:
            x (Tensor): Input tensor of shape (batch_size, sequence_length, input_size).

        Returns:
            out (Tensor): Output predictions, typically from the final time step.
        """
        # Initialize hidden state and cell state with zeros.
        # They are of shape (num_layers, batch_size, hidden_size)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # LSTM output. The lstm returns output for all time steps, and the last hidden and cell state.
        out, (hn, cn) = self.lstm(x, (h0, c0))
        
        # We can use the last time step's output for prediction
        last_time_step = out[:, -1, :]
        out = self.fc(last_time_step)
        return out

# Example usage:
if __name__ == "__main__":
    # Hyperparameters
    batch_size = 32
    sequence_length = 10  # Number of time steps in each sequence
    input_size = 1        # Number of features per time step (univariate time series)
    hidden_size = 50
    num_layers = 2
    output_size = 1       # For regression (or number of classes for classification)
    
    # Create an instance of the LSTM model
    model = LSTMModel(input_size, hidden_size, num_layers, output_size, dropout=0.2)
    
    # Example input: a batch of sequences
    example_input = torch.randn(batch_size, sequence_length, input_size)
    
    # Forward pass
    output = model(example_input)
    print("Output shape:", output.shape)  # Expected: (batch_size, output_size)


Output shape: torch.Size([32, 1])
