In [1]:
import tensorflow as tf
import pandas as pd

# Assume df is your DataFrame and seq_length is defined
df_mulvar = df[["bus", "rail"]] / 1e6  # use both bus & rail series as input
df_mulvar["next_day_type"] = df["day_type"].shift(-1)  # we know tomorrow's type
df_mulvar = pd.get_dummies(df_mulvar)  # one-hot encode the day type

# Split the dataset
mulvar_train = df_mulvar["2016-01":"2018-12"]
mulvar_valid = df_mulvar["2019-01":"2019-05"]
mulvar_test = df_mulvar["2019-06":]

tf.random.set_seed(42)  # extra code – ensures reproducibility

# Convert dataframes to numpy arrays
train_data = mulvar_train.to_numpy()
valid_data = mulvar_valid.to_numpy()

# Create training dataset
train_mulvar_ds = tf.keras.utils.timeseries_dataset_from_array(
    train_data,
    targets=train_data[seq_length:, 1],  # forecast only the rail series
    sequence_length=seq_length,
    batch_size=32,
    shuffle=True,
    seed=42
)

# Create validation dataset
valid_mulvar_ds = tf.keras.utils.timeseries_dataset_from_array(
    valid_data,
    targets=valid_data[seq_length:, 1],  # forecast only the rail series
    sequence_length=seq_length,
    batch_size=32
)

# Define the model
tf.random.set_seed(42)  # extra code – ensures reproducibility
mulvar_model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(32, input_shape=[seq_length, 5]),
    tf.keras.layers.Dense(1)
])

# Compile, fit, and evaluate the model
def fit_and_evaluate(model, train_set, valid_set, learning_rate=0.05, epochs=20):
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
    opt = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
    model.compile(loss=tf.keras.losses.Huber(), optimizer=opt, metrics=["mae"])
    history = model.fit(train_set, validation_data=valid_set, epochs=epochs, callbacks=[early_stopping_cb])
    valid_loss, valid_mae = model.evaluate(valid_set)
    return valid_mae * 1e6

fit_and_evaluate(mulvar_model, train_mulvar_ds, valid_mulvar_ds, learning_rate=0.05, epochs = 500)


NameError: name 'df' is not defined