In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, Bidirectional, Conv1D, MaxPooling1D, Flatten, Concatenate
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import tensorflow.keras as keras

In [2]:
X_train = pd.read_csv("./data/X_train_lstm.csv")
X_test = pd.read_csv("./data/X_test_lstm.csv")

Y_train = pd.read_csv("./data/Y_train_lstm.csv")
Y_test = pd.read_csv("./data/Y_test_lstm.csv")

### Set random seeds

In [3]:
np.random.seed(42)
tf.random.set_seed(42)
keras.utils.set_random_seed(42)

### Train LSTM

In [4]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/lstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
lstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    LSTM(units=64, activation='relu', recurrent_dropout=0.2),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
lstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = lstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = lstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = lstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m2873/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - loss: 1912.3690 - mae: 21.0149
Epoch 1: val_loss improved from inf to 474.74130, saving model to ./models/lstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 14ms/step - loss: 1911.0349 - mae: 21.0067 - val_loss: 474.7413 - val_mae: 10.9901
Epoch 2/50
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 495.1689 - mae: 11.0249
Epoch 2: val_loss did not improve from 474.74130
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - loss: 495.1945 - mae: 11.0251 - val_loss: 494.6069 - val_mae: 10.4383
Epoch 3/50
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 614.9430 - mae: 11.4525
Epoch 3: val_loss improved from 474.74130 to 452.43716, saving model to ./models/lstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 12ms/step - loss: 614.924

### Train BiLSTM

In [5]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/bilstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
bilstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Bidirectional(
        LSTM(units=64, activation='relu', recurrent_dropout=0.2)
    ),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
bilstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = bilstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = bilstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = bilstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m2874/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 1821.3560 - mae: 20.9371
Epoch 1: val_loss improved from inf to 395.48599, saving model to ./models/bilstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 18ms/step - loss: 1820.2422 - mae: 20.9299 - val_loss: 395.4860 - val_mae: 11.1859
Epoch 2/50
[1m2875/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 378.2658 - mae: 10.5549
Epoch 2: val_loss improved from 395.48599 to 389.07269, saving model to ./models/bilstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 17ms/step - loss: 378.2642 - mae: 10.5549 - val_loss: 389.0727 - val_mae: 10.6232
Epoch 3/50
[1m2875/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - loss: 384.2144 - mae: 10.5152
Epoch 3: val_loss improved from 389.07269 to 387.34839, saving model to ./models/bilstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━

### Train CNN + LSTM Hybrid

In [6]:
checkpoint = ModelCheckpoint("./models/hybrid_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define input layer
input_layer = Input(shape=(X_train.shape[1], 1))

# CNN model
conv_layer = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
maxpool_layer = MaxPooling1D(pool_size=2)(conv_layer)
flatten_layer = Flatten()(maxpool_layer)
dense_cnn = Dense(32, activation='relu')(flatten_layer)

# BiLSTM model
lstm_layer = LSTM(64, activation='relu')(input_layer)
# lstm_layer2 = LSTM(32, activation='relu', return_sequences=False)(lstm_layer)
dense_lstm = Dense(32, activation='relu')(lstm_layer)

# Concatenate CNN and BiLSTM outputs
concatenated = Concatenate()([dense_cnn, dense_lstm])

# Output layer
output_layer = Dense(5)(concatenated)

# Create the ensemble model
hybrid_model = Model(inputs=input_layer, outputs=output_layer)

hybrid_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = hybrid_model.fit(
    X_train,
    Y_train,
    epochs=50,
    batch_size=512,
    validation_split=0.2,
    callbacks=[checkpoint, early_stopping]
)

loss, mae = hybrid_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = hybrid_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m1487/1489[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 551.3322 - mae: 12.9745
Epoch 1: val_loss improved from inf to 394.40552, saving model to ./models/hybrid_model.keras
[1m1489/1489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 23ms/step - loss: 551.0410 - mae: 12.9706 - val_loss: 394.4055 - val_mae: 10.6622
Epoch 2/50
[1m1487/1489[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 367.5835 - mae: 10.2510
Epoch 2: val_loss improved from 394.40552 to 387.63156, saving model to ./models/hybrid_model.keras
[1m1489/1489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 22ms/step - loss: 367.5815 - mae: 10.2510 - val_loss: 387.6316 - val_mae: 10.6408
Epoch 3/50
[1m1488/1489[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 362.9244 - mae: 10.1256
Epoch 3: val_loss did not improve from 387.63156
[1m1489/1489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 22ms/step - loss: 362.9