In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, Bidirectional, Conv1D, MaxPooling1D, Flatten, Concatenate, TimeDistributed, Reshape, Activation, Lambda, Layer
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import register_keras_serializable
import tensorflow as tf
import tensorflow.keras as keras

In [2]:
X_train_aug = pd.read_csv("./data/X_train_lstm.csv")
X_test_aug = pd.read_csv("./data/X_test_lstm.csv")

Y_train_aug = pd.read_csv("./data/Y_train_lstm.csv")
Y_test_aug = pd.read_csv("./data/Y_test_lstm.csv")

# Only using total delay
Y_train_aug = Y_train_aug[['ARR_DELAY']]
Y_test_aug = Y_test_aug[['ARR_DELAY']]

In [3]:
X_train = pd.read_csv("./data/old_data/X_train_lstm.csv")
X_test = pd.read_csv("./data/old_data/X_test_lstm.csv")

Y_train = pd.read_csv("./data/old_data/Y_train_lstm.csv")
Y_test = pd.read_csv("./data/old_data/Y_test_lstm.csv")

# Only using total delay
Y_train = Y_train[['ARR_DELAY']]
Y_test = Y_test[['ARR_DELAY']]


### Set random seeds

In [4]:
np.random.seed(42)
tf.random.set_seed(42)
keras.utils.set_random_seed(42)

### Train LSTM

In [5]:
# Defining callbacks
checkpoint_aug = ModelCheckpoint("./models/lstm_model_aug.keras", monitor='val_loss', save_best_only=True, verbose=1, save_weights_only=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# 2 layers of LSTM and number of hidden units were hand tuned
lstm_model_aug = Sequential([
    Input(shape=(X_train_aug.shape[1], 1)),
    LSTM(units=64, activation='relu', recurrent_dropout=0.2),
    Dense(1)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
lstm_model_aug.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
history_aug = lstm_model_aug.fit(X_train_aug, Y_train_aug, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint_aug, early_stopping])


loss, mae = lstm_model_aug.evaluate(X_test_aug, Y_test_aug)
print("Test Mean Absolute Error:", mae)

Y_pred_aug = lstm_model_aug.predict(X_test_aug)

mae_columns = mean_absolute_error(Y_test_aug, Y_pred_aug)
print("Mean Absolute Error for each column:")
print(mae_columns)

pd.DataFrame(history_aug.history).to_csv("./models/results/lstm_test_aug.csv")

Epoch 1/50
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1099.3448 - mae: 19.4959
Epoch 1: val_loss improved from inf to 530.66980, saving model to ./models/lstm_model_aug.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 14ms/step - loss: 1099.3519 - mae: 19.4958 - val_loss: 530.6698 - val_mae: 16.2410
Epoch 2/50
[1m10880/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 1238.8148 - mae: 19.0135
Epoch 2: val_loss improved from 530.66980 to 495.81778, saving model to ./models/lstm_model_aug.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 13ms/step - loss: 1238.7844 - mae: 19.0134 - val_loss: 495.8178 - val_mae: 16.5178
Epoch 3/50
[1m10880/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - loss: 5637.0264 - mae: 21.4801
Epoch 3: val_loss improved from 495.81778 to 480.63504, saving model to ./models/lstm_model_aug.keras
[1m10883/10883[0

In [6]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/lstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1, save_weights_only=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
lstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    LSTM(units=64, activation='relu', recurrent_dropout=0.2),
    Dense(1)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
lstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = lstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = lstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = lstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred)#, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

pd.DataFrame(history.history).to_csv("./models/results/lstm_test.csv")

Epoch 1/50
[1m10881/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 447.4342 - mae: 15.9438
Epoch 1: val_loss improved from inf to 452.75455, saving model to ./models/lstm_model.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 7ms/step - loss: 447.4272 - mae: 15.9437 - val_loss: 452.7545 - val_mae: 16.0383
Epoch 2/50
[1m10877/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 408.8911 - mae: 15.3260
Epoch 2: val_loss improved from 452.75455 to 448.77139, saving model to ./models/lstm_model.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 6ms/step - loss: 408.8907 - mae: 15.3260 - val_loss: 448.7714 - val_mae: 16.1321
Epoch 3/50
[1m10878/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 406.0383 - mae: 15.2436
Epoch 3: val_loss improved from 448.77139 to 447.69293, saving model to ./models/lstm_model.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━

### Train BiLSTM

In [7]:
# Defining callbacks
checkpoint_aug = ModelCheckpoint("./models/bilstm_model_aug.keras", monitor='val_loss', save_best_only=True, verbose=1, save_weights_only=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
bilstm_model_aug = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Bidirectional(
        LSTM(units=64, activation='relu', recurrent_dropout=0.2)
    ),
    Dense(1)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
bilstm_model_aug.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history_aug = bilstm_model_aug.fit(X_train_aug, Y_train_aug, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint_aug, early_stopping])


loss, mae = bilstm_model_aug.evaluate(X_test_aug, Y_test_aug)
print("Test Mean Absolute Error:", mae)

Y_pred_aug = bilstm_model_aug.predict(X_test_aug)

mae_columns = mean_absolute_error(Y_test_aug, Y_pred_aug)#, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

pd.DataFrame(history_aug.history).to_csv("./models/results/bilstm_test_aug.csv")

Epoch 1/50
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1162.8428 - mae: 19.1464
Epoch 1: val_loss improved from inf to 445.05237, saving model to ./models/bilstm_model_aug.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 18ms/step - loss: 1162.7882 - mae: 19.1462 - val_loss: 445.0524 - val_mae: 15.9454
Epoch 2/50
[1m10881/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 4869.1328 - mae: 20.0206
Epoch 2: val_loss did not improve from 445.05237
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 17ms/step - loss: 4868.9946 - mae: 20.0206 - val_loss: 453.4020 - val_mae: 15.9955
Epoch 3/50
[1m10881/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 1320.5222 - mae: 18.0874
Epoch 3: val_loss did not improve from 445.05237
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 18ms/step - loss: 1320.4877 - mae: 18.0875 - val_l

In [8]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/bilstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1, save_weights_only=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
bilstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Bidirectional(
        LSTM(units=64, activation='relu', recurrent_dropout=0.2)
    ),
    Dense(1)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
bilstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = bilstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = bilstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = bilstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred)#, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

pd.DataFrame(history.history).to_csv("./models/results/bilstm_test.csv")

Epoch 1/50
[1m10878/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 570.9128 - mae: 17.2346
Epoch 1: val_loss improved from inf to 448.05511, saving model to ./models/bilstm_model.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 10ms/step - loss: 570.8456 - mae: 17.2339 - val_loss: 448.0551 - val_mae: 16.1207
Epoch 2/50
[1m10880/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 408.1736 - mae: 15.3034
Epoch 2: val_loss improved from 448.05511 to 446.09976, saving model to ./models/bilstm_model.keras
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 10ms/step - loss: 408.1734 - mae: 15.3034 - val_loss: 446.0998 - val_mae: 16.1436
Epoch 3/50
[1m10879/10883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 409.7234 - mae: 15.3038
Epoch 3: val_loss did not improve from 446.09976
[1m10883/10883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 10ms/step -

### Train CNN + LSTM Hybrid

In [9]:
checkpoint_aug = ModelCheckpoint("./models/hybrid_model_aug.keras", monitor='val_loss', save_best_only=True, verbose=1, save_weights_only=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define input layer
input_layer_aug = Input(shape=(X_train_aug.shape[1], 1))

# CNN model
conv_layer_aug = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer_aug)
maxpool_layer_aug = MaxPooling1D(pool_size=2)(conv_layer_aug)
flatten_layer_aug = Flatten()(maxpool_layer_aug)
dense_cnn_aug = Dense(32, activation='relu')(flatten_layer_aug)

# BiLSTM model
lstm_layer_aug = LSTM(64, activation='relu')(input_layer_aug)
# lstm_layer2 = LSTM(32, activation='relu', return_sequences=False)(lstm_layer)
dense_lstm_aug = Dense(32, activation='relu')(lstm_layer_aug)

# Concatenate CNN and BiLSTM outputs
concatenated_aug = Concatenate()([dense_cnn_aug, dense_lstm_aug])

# Output layer
output_layer_aug = Dense(1)(concatenated_aug)

# Create the ensemble model
hybrid_model_aug = Model(inputs=input_layer_aug, outputs=output_layer_aug)

hybrid_model_aug.compile(optimizer='adam', loss='mse', metrics=['mae'])

history_aug = hybrid_model_aug.fit(
    X_train_aug,
    Y_train_aug,
    epochs=50,
    batch_size=512,
    validation_split=0.2,
    callbacks=[checkpoint_aug, early_stopping]
)

loss, mae = hybrid_model_aug.evaluate(X_test_aug, Y_test_aug)
print("Test Mean Absolute Error:", mae)

Y_pred_aug = hybrid_model_aug.predict(X_test_aug)

mae_columns = mean_absolute_error(Y_test_aug, Y_pred_aug)#, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

pd.DataFrame(history_aug.history).to_csv("./models/results/cnn_lstm_test_aug.csv")

Epoch 1/50
[1m5632/5633[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 562.8344 - mae: 16.7462
Epoch 1: val_loss improved from inf to 446.44348, saving model to ./models/hybrid_model_aug.keras
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 22ms/step - loss: 562.7859 - mae: 16.7458 - val_loss: 446.4435 - val_mae: 16.1197
Epoch 2/50
[1m5631/5633[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 402.5555 - mae: 15.1362
Epoch 2: val_loss did not improve from 446.44348
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 22ms/step - loss: 402.5577 - mae: 15.1363 - val_loss: 450.8263 - val_mae: 15.8314
Epoch 3/50
[1m5632/5633[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 403.4681 - mae: 15.1895
Epoch 3: val_loss did not improve from 446.44348
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 22ms/step - loss: 403.4678 - mae: 15.1895 - val_loss: 447.8056 - va

In [10]:
checkpoint = ModelCheckpoint("./models/hybrid_model.keras", monitor='val_loss', save_best_only=True, verbose=1, save_weights_only=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define input layer
input_layer = Input(shape=(X_train.shape[1], 1))

# CNN model
conv_layer = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
maxpool_layer = MaxPooling1D(pool_size=2)(conv_layer)
flatten_layer = Flatten()(maxpool_layer)
dense_cnn = Dense(32, activation='relu')(flatten_layer)

# BiLSTM model
lstm_layer = LSTM(64, activation='relu')(input_layer)
# lstm_layer2 = LSTM(32, activation='relu', return_sequences=False)(lstm_layer)
dense_lstm = Dense(32, activation='relu')(lstm_layer)

# Concatenate CNN and BiLSTM outputs
concatenated = Concatenate()([dense_cnn, dense_lstm])

# Output layer
output_layer = Dense(1)(concatenated)

# Create the ensemble model
hybrid_model = Model(inputs=input_layer, outputs=output_layer)

hybrid_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = hybrid_model.fit(
    X_train,
    Y_train,
    epochs=50,
    batch_size=512,
    validation_split=0.2,
    callbacks=[checkpoint, early_stopping]
)

loss, mae = hybrid_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = hybrid_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred)#, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

pd.DataFrame(history.history).to_csv("./models/results/cnn_lstm_test.csv")

Epoch 1/50
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 432.5814 - mae: 15.9155
Epoch 1: val_loss improved from inf to 454.90015, saving model to ./models/hybrid_model.keras
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 14ms/step - loss: 432.5790 - mae: 15.9155 - val_loss: 454.9001 - val_mae: 16.0124
Epoch 2/50
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 410.9279 - mae: 15.4097
Epoch 2: val_loss improved from 454.90015 to 450.82748, saving model to ./models/hybrid_model.keras
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 14ms/step - loss: 410.9276 - mae: 15.4096 - val_loss: 450.8275 - val_mae: 15.9566
Epoch 3/50
[1m5631/5633[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 406.5569 - mae: 15.2816
Epoch 3: val_loss improved from 450.82748 to 450.23837, saving model to ./models/hybrid_model.keras
[1m5633/5633[0m [32m━━━━━━━━━━━━━━━━━━

GNN + LSTM Hybrid Model

In [11]:

# ─── CONFIG ────────────────────────────────────────────────────
MASTER_COORD = "./Datasets/T_MASTER_CORD.csv"
K_NEIGHBORS  = 5      # connect each airport to its 5 closest neighbors
EARTH_R      = 6371.0 # km

# ─── 1) load airport coordinates ────────────────────────────────
mc = (
    pd.read_csv(MASTER_COORD, dtype=str)
      .set_index("AIRPORT_SEQ_ID")[["LATITUDE","LONGITUDE"]]
      .astype(float)
)
# list of IDs and arrays of lat/lon in radians
ids  = mc.index.to_list()
lats = np.deg2rad(mc["LATITUDE"].values)
lons = np.deg2rad(mc["LONGITUDE"].values)
N    = len(ids)

# ─── 2) compute pairwise haversine distances ────────────────────
# using the vectorized formula
lat1 = lats[:, None]
lat2 = lats[None, :]
dlon = lons[None, :] - lons[:, None]

dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
dist_km = EARTH_R * c  # shape (N, N)

# ─── 3) build adjacency by k-nearest neighbors ───────────────────
A = np.zeros((N, N), dtype=np.float32)

for i in range(N):
    # argsort returns i itself at position 0, so skip it
    neighbors = np.argsort(dist_km[i])[1 : K_NEIGHBORS+1]
    A[i, neighbors] = 1.0

# symmetrize: if i→j or j→i, keep edge both ways
A = np.maximum(A, A.T)

# ─── 4) (Optional) row-normalize adjacency ──────────────────────
# so each row sums to 1
row_sums = A.sum(axis=1, keepdims=True)
A_norm   = A / np.where(row_sums>0, row_sums, 1.0)

# Now `A_norm` is your adjacency matrix to feed into the GNN
print("Adjacency matrix shape:", A_norm.shape)



Adjacency matrix shape: (19213, 19213)


In [12]:
@register_keras_serializable(package="Custom", name="SimpleGraphConv")
class SimpleGraphConv(Layer):
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        # input_shape = [(batch, N, F), (batch, N, N)]
        F = input_shape[0][-1]
        self.w = self.add_weight(
            shape=(F, self.units),
            initializer="glorot_uniform",
            name="kernel")
        super().build(input_shape)

    def call(self, inputs):
        X, A = inputs    # X: (batch, N, F), A: (batch, N, N)
        return tf.matmul(A, tf.matmul(X, self.w))


@register_keras_serializable(package="Custom", name="GCNTimeDistributed")
class GCNTimeDistributed(Layer):
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        # we’ll reuse this conv at each time slice
        self.gcn = SimpleGraphConv(units)

    def build(self, input_shape):
        # input_shape = [(batch, T, N, F), (batch, N, N)]
        # we need to build the inner GCN on a flattened time‐slice shape:
        # pretend batch' = None and time = 1 so shape = (None, N, F) & (None, N, N)
        _, T, N, F = input_shape[0]
        # call inner build:
        self.gcn.build([(None, N, F), (None, N, N)])
        super().build(input_shape)

    def call(self, inputs):
        X, A = inputs
        batch = tf.shape(X)[0]
        T     = tf.shape(X)[1]
        N     = tf.shape(X)[2]
        F     = tf.shape(X)[3]

        # 1) collapse time
        Xr = tf.reshape(X, (batch * T, N, F))
        # 2) tile adjacency
        Aexp = tf.expand_dims(A, 1)               # (batch, 1, N, N)
        Atil = tf.tile(Aexp, [1, T, 1, 1])         # (batch, T, N, N)
        Ar   = tf.reshape(Atil, (batch * T, N, N))
        # 3) apply GCN
        Yr = self.gcn([Xr, Ar])                    # (batch*T, N, units)
        # 4) restore time axis
        return tf.reshape(Yr, (batch, T, N, self.units))

In [13]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor="val_mae",
    factor=0.5,
    patience=3,
    verbose=1,
    min_lr=1e-6
)

Build GNN data

In [14]:
# 1) decide T, N, F
T = 1       # one snapshot per flight
N = 2       # origin + destination nodes
F = 5       # the five weather vars you listed

A_np = A_norm
gcn_units = 32

X_in = Input(shape=(T, N, F), name="node_features")       # dynamic nodal time‐series
A_in = Input(shape=(N, N),    name="adjacency_matrix")    # can be broadcast

weather_cols = X_train.columns[: N * F]  # first 10 columns
assert len(weather_cols) == N * F

# ─── C) extract and reshape ───────────────────────────────────────────────────
weather_train = X_train[weather_cols].to_numpy()  # shape (n_samples, 10)
weather_test  = X_test[ weather_cols].to_numpy()

# reshape into (batch, T, N, F)
X_train_gnn = weather_train.reshape(-1, T, N, F)
X_test_gnn  = weather_test.reshape(-1, T, N, F)

print(X_train_gnn.shape)  # → (952611, 1, 2, 5)

(3604675, 1, 2, 5)


In [15]:
# ─── 2a) GCN‐over‐time layer 1
g = GCNTimeDistributed(gcn_units, name="time_gcn")([X_in, A_in])

# ─── (optional) 2nd GCN‐over‐time
g = GCNTimeDistributed(gcn_units, name="time_gcn2")([g, A_in])

# ─── 2b) flatten per‐time‐step but keep T

g_flat = TimeDistributed(Flatten(), name="flatten_nodes")(g)  
# shape = (batch, T, N * gcn_units)

# ─── 2c) LSTM
h = LSTM(64, name="temporal_lstm")(g_flat)  # now g_flat is 3D

# ─── 2d) final Dense (make sure units=Y_train.shape[1])
Y_dim = Y_train.shape[1]
out = Dense(Y_dim, name="output")(h)




In [16]:
# ─── 3) Compile, train & evaluate ────────────────────────────────────────────

gnn_lstm = Model([X_in, A_in], out, name="GNN_LSTM_Hybrid")
gnn_lstm.summary()
gnn_lstm.compile(optimizer="adam", loss="mse", metrics=["mae"])


# Define callbacks
checkpoint_cb = ModelCheckpoint(
    filepath="./models/gnn_lstm.keras",
    monitor="val_loss",
    save_best_only=True,
    verbose=1, save_weights_only=False
)
earlystop_cb = EarlyStopping(
    monitor="val_loss",
    patience=5,
    verbose=1,
    restore_best_weights=True
)



In [17]:

# For origin ↔ dest only:
A2 = np.array([[0., 1.],
               [1., 0.]], dtype=np.float32)

# Tile it for every sample in the train/test set:
A_train = np.tile(A2[None], (X_train_gnn.shape[0], 1, 1))  # (952611,2,2)
A_test  = np.tile(A2[None], (X_test_gnn.shape[0],  1, 1))  # (… likewise)

print("A_train shape:", A_train.shape)



# Fit just like you did your LSTM models:
history = gnn_lstm.fit(
    [X_train_gnn, A_train],
    Y_train,
    batch_size=32,
    epochs=50,
    validation_split=0.2,
    callbacks=[checkpoint_cb, earlystop_cb, reduce_lr],
)

test_results = gnn_lstm.evaluate(
    [X_test_gnn, A_test],
    Y_test,
    verbose=0,
)
print("Test loss, MAE:", test_results)

A_train shape: (3604675, 2, 2)
Epoch 1/50
[1m90111/90117[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 448.6422 - mae: 16.1254
Epoch 1: val_loss improved from inf to 486.62286, saving model to ./models/gnn_lstm.keras
[1m90117/90117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 2ms/step - loss: 448.6415 - mae: 16.1254 - val_loss: 486.6229 - val_mae: 16.7691 - learning_rate: 0.0010
Epoch 2/50
[1m90092/90117[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 438.4288 - mae: 16.1840
Epoch 2: val_loss did not improve from 486.62286
[1m90117/90117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s[0m 2ms/step - loss: 438.4288 - mae: 16.1840 - val_loss: 486.6229 - val_mae: 16.7691 - learning_rate: 0.0010
Epoch 3/50
[1m90104/90117[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 438.4287 - mae: 16.1840
Epoch 3: val_loss improved from 486.62286 to 486.62277, saving model to ./models/gnn_lstm.keras
[1m90117/9011

In [18]:
pd.DataFrame(history.history).to_csv("./models/results/gnn_test.csv")
np.save("./data/X_train_gnn.npy", X_train_gnn)
np.save("./data/A_train.npy",     A_train)
np.save("./data/Y_train.npy",     Y_train)

np.save("./data/X_test_gnn.npy", X_test_gnn)
np.save("./data/A_test.npy",     A_test)
np.save("./data/Y_test.npy",     Y_test)

print("Saved GNN training data → ./data/")

Saved GNN training data → ./data/


In [19]:

# decide T, N, F
T = 1       # one snapshot per flight
N = 2       # origin + destination nodes
F = 5       # the five weather vars you listed


A_np_aug = A_norm
gcn_units = 32

# Inputs
X_in_aug = Input(shape=(T, N, F), name="node_features")       # dynamic nodal time‐series
A_in_aug = Input(shape=(N, N),    name="adjacency_matrix")    # can be broadcast


# pull out just the 10 weather columns from your 11
weather_cols_aug = X_train_aug.columns[: N * F]  # first 10 columns
assert len(weather_cols_aug) == N * F

# extract and reshape
weather_train_aug = X_train_aug[weather_cols_aug].to_numpy()  # shape (n_samples, 10)
weather_test_aug  = X_test_aug[ weather_cols_aug].to_numpy()

# reshape into (batch, T, N, F)
X_train_gnn_aug = weather_train_aug.reshape(-1, T, N, F)
X_test_gnn_aug  = weather_test_aug.reshape(-1, T, N, F)

print(X_train_gnn_aug.shape)


(3604675, 1, 2, 5)


In [20]:
# Build the Graph-LSTM hybrid

# GCN‐over‐time layer 1
g_aug = GCNTimeDistributed(gcn_units, name="time_gcn")([X_in_aug, A_in_aug])


# flatten per‐time‐step but keep T
g_flat_aug = TimeDistributed(Flatten(), name="flatten_nodes")(g_aug)  

# LSTM
h_aug = LSTM(64, name="temporal_lstm")(g_flat_aug)  # now g_flat is 3D

# final Dense (make sure units=Y_train.shape[1])
Y_dim = Y_train_aug.shape[1]
out_aug = Dense(Y_dim, name="output")(h_aug)



In [21]:
# Compile, train & evaluate

# Assemble
gnn_lstm_aug = Model(inputs=[X_in_aug, A_in_aug], outputs=out_aug, name="GNN_LSTM_Hybrid")
gnn_lstm_aug.summary()


gnn_lstm_aug.compile(optimizer="adam",loss="mse",metrics=["mae"])


# Define callbacks
checkpoint_cb = ModelCheckpoint(
    filepath="./models/gnn_lstm_aug.keras",
    monitor="val_loss",
    save_best_only=True,
    verbose=1, save_weights_only=False
)
earlystop_cb = EarlyStopping(
    monitor="val_loss",
    patience=5,
    verbose=1,
    restore_best_weights=True
)



In [22]:
# For origin ↔ dest only:
A2 = np.array([[0., 1.],
               [1., 0.]], dtype=np.float32)

# Tile it for every sample in the train/test set:
A_train_aug = np.tile(A2[None], (X_train_gnn_aug.shape[0], 1, 1))  # (952611,2,2)
A_test_aug  = np.tile(A2[None], (X_test_gnn_aug.shape[0],  1, 1))  # (… likewise)

print("A_train_aug shape:", A_train_aug.shape)


# Fit just like you did your LSTM models:
history_aug = gnn_lstm_aug.fit(
    [X_train_gnn_aug, A_train_aug],
    Y_train_aug,
    batch_size=32,
    epochs=50,
    validation_split=0.2,
    callbacks=[checkpoint_cb, earlystop_cb, reduce_lr],
)

# Evaluate on test:
test_results_aug = gnn_lstm_aug.evaluate(
    [X_test_gnn_aug, A_test_aug],
    Y_test_aug,
    verbose=0,
)
print("Aug Test loss, MAE:", test_results_aug)



A_train_aug shape: (3604675, 2, 2)
Epoch 1/50
[1m90087/90117[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 452.7466 - mae: 16.1462
Epoch 1: val_loss improved from inf to 486.64337, saving model to ./models/gnn_lstm_aug.keras
[1m90117/90117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 1ms/step - loss: 452.7426 - mae: 16.1462 - val_loss: 486.6434 - val_mae: 16.7682 - learning_rate: 0.0010
Epoch 2/50
[1m90105/90117[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 438.4277 - mae: 16.1839
Epoch 2: val_loss did not improve from 486.64337
[1m90117/90117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 2ms/step - loss: 438.4277 - mae: 16.1839 - val_loss: 486.6434 - val_mae: 16.7682 - learning_rate: 0.0010
Epoch 3/50
[1m90092/90117[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 438.4277 - mae: 16.1839
Epoch 3: val_loss did not improve from 486.64337
[1m90117/90117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [23]:
pd.DataFrame(history_aug.history).to_csv("./models/results/gnn_test_aug.csv")

np.save("./data/X_train_gnn_aug.npy", X_train_gnn_aug)
np.save("./data/A_train_aug.npy",     A_train_aug)
np.save("./data/Y_train_aug.npy",     Y_train_aug)

np.save("./data/X_test_gnn_aug.npy", X_test_gnn_aug)
np.save("./data/A_test_aug.npy",     A_test_aug)
np.save("./data/Y_test_aug.npy",     Y_test_aug)

print("Saved GNN aug training data → ./data/")

Saved GNN aug training data → ./data/
