In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, Bidirectional, Conv1D, MaxPooling1D, Flatten, Concatenate
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import tensorflow.keras as keras

In [None]:
X_train = pd.read_csv("./data/X_train_lstm.csv")
X_test = pd.read_csv("./data/X_test_lstm.csv")

Y_train = pd.read_csv("./data/Y_train_lstm.csv")
Y_test = pd.read_csv("./data/Y_test_lstm.csv")

### Set random seeds

In [3]:
np.random.seed(42)
tf.random.set_seed(42)
keras.utils.set_random_seed(42)

### Train LSTM

In [None]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/lstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
lstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    LSTM(units=64, activation='relu', recurrent_dropout=0.2),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
lstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = lstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = lstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = lstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
Epoch 1: val_loss improved from inf to 378.16812, saving model to ../models\lstm_model.keras
Epoch 2/50
Epoch 2: val_loss improved from 378.16812 to 374.56149, saving model to ../models\lstm_model.keras
Epoch 3/50
Epoch 3: val_loss improved from 374.56149 to 372.89502, saving model to ../models\lstm_model.keras
Epoch 4/50
Epoch 4: val_loss improved from 372.89502 to 372.48251, saving model to ../models\lstm_model.keras
Epoch 5/50
Epoch 5: val_loss improved from 372.48251 to 369.26303, saving model to ../models\lstm_model.keras
Epoch 6/50
Epoch 6: val_loss did not improve from 369.26303
Epoch 7/50
Epoch 7: val_loss improved from 369.26303 to 368.16092, saving model to ../models\lstm_model.keras
Epoch 8/50
Epoch 8: val_loss improved from 368.16092 to 368.10052, saving model to ../models\lstm_model.keras
Epoch 9/50
Epoch 9: val_loss improved from 368.10052 to 367.97589, saving model to ../models\lstm_model.keras
Epoch 10/50
Epoch 10: val_loss improved from 367.97589 to 367.6136

### Train BiLSTM

In [None]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/bilstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
bilstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Bidirectional(
        LSTM(units=64, activation='relu', recurrent_dropout=0.2)
    ),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
bilstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = bilstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = bilstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = bilstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
Epoch 1: val_loss improved from inf to 378.54678, saving model to ../models\bilstm_model.keras
Epoch 2/50
Epoch 2: val_loss improved from 378.54678 to 374.90466, saving model to ../models\bilstm_model.keras
Epoch 3/50
Epoch 3: val_loss improved from 374.90466 to 374.22491, saving model to ../models\bilstm_model.keras
Epoch 4/50
Epoch 4: val_loss did not improve from 374.22491
Epoch 5/50
Epoch 5: val_loss improved from 374.22491 to 371.45572, saving model to ../models\bilstm_model.keras
Epoch 6/50
Epoch 6: val_loss did not improve from 371.45572
Epoch 7/50
Epoch 7: val_loss improved from 371.45572 to 369.03610, saving model to ../models\bilstm_model.keras
Epoch 8/50
Epoch 8: val_loss improved from 369.03610 to 368.71072, saving model to ../models\bilstm_model.keras
Epoch 9/50
Epoch 9: val_loss did not improve from 368.71072
Epoch 10/50
Epoch 10: val_loss did not improve from 368.71072
Epoch 11/50
Epoch 11: val_loss did not improve from 368.71072
Epoch 12/50
Epoch 12: val_loss

### Train CNN + LSTM Hybrid

In [None]:
checkpoint = ModelCheckpoint("./models/hybrid_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define input layer
input_layer = Input(shape=(X_train.shape[1], 1))

# CNN model
conv_layer = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
maxpool_layer = MaxPooling1D(pool_size=2)(conv_layer)
flatten_layer = Flatten()(maxpool_layer)
dense_cnn = Dense(32, activation='relu')(flatten_layer)

# BiLSTM model
lstm_layer = LSTM(64, activation='relu')(input_layer)
# lstm_layer2 = LSTM(32, activation='relu', return_sequences=False)(lstm_layer)
dense_lstm = Dense(32, activation='relu')(lstm_layer)

# Concatenate CNN and BiLSTM outputs
concatenated = Concatenate()([dense_cnn, dense_lstm])

# Output layer
output_layer = Dense(5)(concatenated)

# Create the ensemble model
hybrid_model = Model(inputs=input_layer, outputs=output_layer)

hybrid_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = hybrid_model.fit(
    X_train,
    Y_train,
    epochs=50,
    batch_size=512,
    validation_split=0.2,
    callbacks=[checkpoint, early_stopping]
)

loss, mae = hybrid_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = hybrid_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)



Epoch 1/50
Epoch 1: val_loss improved from inf to 387.31973, saving model to ../models\hybrid_model.keras
Epoch 2/50
Epoch 2: val_loss improved from 387.31973 to 384.78098, saving model to ../models\hybrid_model.keras
Epoch 3/50
Epoch 3: val_loss did not improve from 384.78098
Epoch 4/50
Epoch 4: val_loss improved from 384.78098 to 377.85553, saving model to ../models\hybrid_model.keras
Epoch 5/50
Epoch 5: val_loss did not improve from 377.85553
Epoch 6/50
Epoch 6: val_loss did not improve from 377.85553
Epoch 7/50
Epoch 7: val_loss did not improve from 377.85553
Epoch 8/50
Epoch 8: val_loss did not improve from 377.85553
Epoch 9/50
Epoch 9: val_loss did not improve from 377.85553
Epoch 9: early stopping
Test Mean Absolute Error: 10.67187786102295
Mean Absolute Error for each column:
[19.45525618  3.15689241  0.8698911  11.05302003 18.82440156]
