In [31]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, Bidirectional, Conv1D, MaxPooling1D, Flatten, Concatenate, TimeDistributed, Reshape, Activation, Lambda, Layer
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import register_keras_serializable
import tensorflow as tf
import tensorflow.keras as keras




In [2]:
X_train_aug = pd.read_csv("./data/X_train_lstm.csv")
X_test_aug = pd.read_csv("./data/X_test_lstm.csv")

Y_train_aug = pd.read_csv("./data/Y_train_lstm.csv")
Y_test_aug = pd.read_csv("./data/Y_test_lstm.csv")

In [3]:
X_train = pd.read_csv("./data/old_data/X_train_lstm.csv")
X_test = pd.read_csv("./data/old_data/X_test_lstm.csv")

Y_train = pd.read_csv("./data/old_data/Y_train_lstm.csv")
Y_test = pd.read_csv("./data/old_data/Y_test_lstm.csv")


### Set random seeds

In [4]:
np.random.seed(42)
tf.random.set_seed(42)
keras.utils.set_random_seed(42)

### Train LSTM

In [5]:
# Defining callbacks
checkpoint_aug = ModelCheckpoint("./models/lstm_model_aug.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
lstm_model_aug = Sequential([
    Input(shape=(X_train_aug.shape[1], 1)),
    LSTM(units=64, activation='relu', recurrent_dropout=0.2),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
lstm_model_aug.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = lstm_model_aug.fit(X_train_aug, Y_train_aug, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint_aug, early_stopping])


loss, mae = lstm_model_aug.evaluate(X_test_aug, Y_test_aug)
print("Test Mean Absolute Error:", mae)

Y_pred_aug = lstm_model_aug.predict(X_test_aug)

mae_columns = mean_absolute_error(Y_test_aug, Y_pred_aug, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m2867/2868[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 3423.3923 - mae: 25.2094
Epoch 1: val_loss improved from inf to 479.73154, saving model to ./models/lstm_model_aug.keras
[1m2868/2868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 13ms/step - loss: 3422.0540 - mae: 25.2032 - val_loss: 479.7315 - val_mae: 9.9277
Epoch 2/50
[1m2867/2868[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - loss: 533.6815 - mae: 11.1124
Epoch 2: val_loss improved from 479.73154 to 434.94122, saving model to ./models/lstm_model_aug.keras
[1m2868/2868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 13ms/step - loss: 533.8452 - mae: 11.1133 - val_loss: 434.9412 - val_mae: 10.6739
Epoch 3/50
[1m2866/2868[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - loss: 473.8381 - mae: 11.0616
Epoch 3: val_loss did not improve from 434.94122
[1m2868/2868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 13ms/step - loss: 

In [6]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/lstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
lstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    LSTM(units=64, activation='relu', recurrent_dropout=0.2),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
lstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = lstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = lstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = lstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m2875/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - loss: 455.6948 - mae: 11.7923
Epoch 1: val_loss improved from inf to 397.93814, saving model to ./models/lstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - loss: 455.6567 - mae: 11.7917 - val_loss: 397.9381 - val_mae: 10.2813
Epoch 2/50
[1m2874/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - loss: 374.7997 - mae: 10.2537
Epoch 2: val_loss improved from 397.93814 to 390.64362, saving model to ./models/lstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 6ms/step - loss: 374.7983 - mae: 10.2537 - val_loss: 390.6436 - val_mae: 10.2638
Epoch 3/50
[1m2873/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - loss: 372.4715 - mae: 10.1862
Epoch 3: val_loss improved from 390.64362 to 388.11856, saving model to ./models/lstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

### Train BiLSTM

In [7]:
# Defining callbacks
checkpoint_aug = ModelCheckpoint("./models/bilstm_model_aug.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
bilstm_model_aug = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Bidirectional(
        LSTM(units=64, activation='relu', recurrent_dropout=0.2)
    ),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
bilstm_model_aug.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = bilstm_model_aug.fit(X_train_aug, Y_train_aug, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint_aug, early_stopping])


loss, mae = bilstm_model_aug.evaluate(X_test_aug, Y_test_aug)
print("Test Mean Absolute Error:", mae)

Y_pred_aug = bilstm_model_aug.predict(X_test_aug)

mae_columns = mean_absolute_error(Y_test_aug, Y_pred_aug, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m2865/2868[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - loss: 1161.1936 - mae: 18.2545
Epoch 1: val_loss improved from inf to 381.04468, saving model to ./models/bilstm_model_aug.keras
[1m2868/2868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 16ms/step - loss: 1160.6000 - mae: 18.2496 - val_loss: 381.0447 - val_mae: 10.2252
Epoch 2/50
[1m2865/2868[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - loss: 560.8406 - mae: 12.5182
Epoch 2: val_loss improved from 381.04468 to 371.49557, saving model to ./models/bilstm_model_aug.keras
[1m2868/2868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 16ms/step - loss: 561.1428 - mae: 12.5194 - val_loss: 371.4956 - val_mae: 9.7406
Epoch 3/50
[1m2868/2868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 443.5334 - mae: 11.5017
Epoch 3: val_loss improved from 371.49557 to 370.59045, saving model to ./models/bilstm_model_aug.keras
[1m2868/2868[0m [32m━━━━━

In [8]:
# Defining callbacks
checkpoint = ModelCheckpoint("./models/bilstm_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define LSTM model
# Up to 2 layers of LSTM and number of hidden units were hand tuned to determine this as the optimum model
bilstm_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Bidirectional(
        LSTM(units=64, activation='relu', recurrent_dropout=0.2)
    ),
    Dense(5)
])

# Use MSE for loss because we want to emphasize the "wrongest" guesses the most. MAE is an interpretable metric
bilstm_model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse', metrics=['mae'])

# Train model w/ early stopping
# Batch size is the average number of flights per day
history = bilstm_model.fit(X_train, Y_train, epochs=50, batch_size=265, validation_split=0.2, callbacks=[checkpoint, early_stopping])


loss, mae = bilstm_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = bilstm_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m2873/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 688.5925 - mae: 15.7021
Epoch 1: val_loss improved from inf to 394.46857, saving model to ./models/bilstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 9ms/step - loss: 688.2634 - mae: 15.6969 - val_loss: 394.4686 - val_mae: 10.4897
Epoch 2/50
[1m2871/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 373.3499 - mae: 10.3713
Epoch 2: val_loss improved from 394.46857 to 391.52213, saving model to ./models/bilstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 9ms/step - loss: 373.3479 - mae: 10.3712 - val_loss: 391.5221 - val_mae: 10.3779
Epoch 3/50
[1m2874/2876[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 370.1465 - mae: 10.2578
Epoch 3: val_loss improved from 391.52213 to 388.73044, saving model to ./models/bilstm_model.keras
[1m2876/2876[0m [32m━━━━━━━━━━━━━━━━━━━━[0

### Train CNN + LSTM Hybrid

In [10]:
checkpoint_aug = ModelCheckpoint("./models/hybrid_model_aug.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define input layer
input_layer_aug = Input(shape=(X_train_aug.shape[1], 1))

# CNN model
conv_layer_aug = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer_aug)
maxpool_layer_aug = MaxPooling1D(pool_size=2)(conv_layer_aug)
flatten_layer_aug = Flatten()(maxpool_layer_aug)
dense_cnn_aug = Dense(32, activation='relu')(flatten_layer_aug)

# BiLSTM model
lstm_layer_aug = LSTM(64, activation='relu')(input_layer_aug)
# lstm_layer2 = LSTM(32, activation='relu', return_sequences=False)(lstm_layer)
dense_lstm_aug = Dense(32, activation='relu')(lstm_layer_aug)

# Concatenate CNN and BiLSTM outputs
concatenated_aug = Concatenate()([dense_cnn_aug, dense_lstm_aug])

# Output layer
output_layer_aug = Dense(5)(concatenated_aug)

# Create the ensemble model
hybrid_model_aug = Model(inputs=input_layer_aug, outputs=output_layer_aug)

hybrid_model_aug.compile(optimizer='adam', loss='mse', metrics=['mae'])

history_aug = hybrid_model_aug.fit(
    X_train_aug,
    Y_train_aug,
    epochs=50,
    batch_size=512,
    validation_split=0.2,
    callbacks=[checkpoint_aug, early_stopping]
)

loss, mae = hybrid_model_aug.evaluate(X_test_aug, Y_test_aug)
print("Test Mean Absolute Error:", mae)

Y_pred_aug = hybrid_model_aug.predict(X_test_aug)

mae_columns = mean_absolute_error(Y_test_aug, Y_pred_aug, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m1483/1485[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 19ms/step - loss: 477.1819 - mae: 12.7312
Epoch 1: val_loss improved from inf to 374.61707, saving model to ./models/hybrid_model_aug.keras
[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 21ms/step - loss: 477.0248 - mae: 12.7280 - val_loss: 374.6171 - val_mae: 10.5223
Epoch 2/50
[1m1482/1485[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 378.4031 - mae: 10.5702
Epoch 2: val_loss improved from 374.61707 to 372.81757, saving model to ./models/hybrid_model_aug.keras
[1m1485/1485[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 22ms/step - loss: 378.3961 - mae: 10.5699 - val_loss: 372.8176 - val_mae: 10.1336
Epoch 3/50
[1m1483/1485[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 19ms/step - loss: 368.5249 - mae: 10.2117
Epoch 3: val_loss improved from 372.81757 to 371.32379, saving model to ./models/hybrid_model_aug.keras
[1m1485/1485[0m [32m━━━━━━

In [11]:
checkpoint = ModelCheckpoint("./models/hybrid_model.keras", monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Define input layer
input_layer = Input(shape=(X_train.shape[1], 1))

# CNN model
conv_layer = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
maxpool_layer = MaxPooling1D(pool_size=2)(conv_layer)
flatten_layer = Flatten()(maxpool_layer)
dense_cnn = Dense(32, activation='relu')(flatten_layer)

# BiLSTM model
lstm_layer = LSTM(64, activation='relu')(input_layer)
# lstm_layer2 = LSTM(32, activation='relu', return_sequences=False)(lstm_layer)
dense_lstm = Dense(32, activation='relu')(lstm_layer)

# Concatenate CNN and BiLSTM outputs
concatenated = Concatenate()([dense_cnn, dense_lstm])

# Output layer
output_layer = Dense(5)(concatenated)

# Create the ensemble model
hybrid_model = Model(inputs=input_layer, outputs=output_layer)

hybrid_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = hybrid_model.fit(
    X_train,
    Y_train,
    epochs=50,
    batch_size=512,
    validation_split=0.2,
    callbacks=[checkpoint, early_stopping]
)

loss, mae = hybrid_model.evaluate(X_test, Y_test)
print("Test Mean Absolute Error:", mae)

Y_pred = hybrid_model.predict(X_test)

mae_columns = mean_absolute_error(Y_test, Y_pred, multioutput='raw_values')
print("Mean Absolute Error for each column:")
print(mae_columns)

Epoch 1/50
[1m1486/1489[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - loss: 440.2392 - mae: 11.9459
Epoch 1: val_loss improved from inf to 404.43927, saving model to ./models/hybrid_model.keras
[1m1489/1489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 440.1000 - mae: 11.9428 - val_loss: 404.4393 - val_mae: 10.4216
Epoch 2/50
[1m1488/1489[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - loss: 371.1723 - mae: 10.3173
Epoch 2: val_loss improved from 404.43927 to 391.91068, saving model to ./models/hybrid_model.keras
[1m1489/1489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 12ms/step - loss: 371.1712 - mae: 10.3172 - val_loss: 391.9107 - val_mae: 10.3450
Epoch 3/50
[1m1485/1489[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - loss: 368.9981 - mae: 10.2475
Epoch 3: val_loss improved from 391.91068 to 390.98267, saving model to ./models/hybrid_model.keras
[1m1489/1489[0m [32m━━━━━━━━━━━━━━━━━━

GNN + LSTM Hybrid Model

In [5]:

# ─── CONFIG ────────────────────────────────────────────────────
MASTER_COORD = "./Datasets/T_MASTER_CORD.csv"
K_NEIGHBORS  = 5      # connect each airport to its 5 closest neighbors
EARTH_R      = 6371.0 # km

# ─── 1) load airport coordinates ────────────────────────────────
mc = (
    pd.read_csv(MASTER_COORD, dtype=str)
      .set_index("AIRPORT_SEQ_ID")[["LATITUDE","LONGITUDE"]]
      .astype(float)
)
# list of IDs and arrays of lat/lon in radians
ids  = mc.index.to_list()
lats = np.deg2rad(mc["LATITUDE"].values)
lons = np.deg2rad(mc["LONGITUDE"].values)
N    = len(ids)

# ─── 2) compute pairwise haversine distances ────────────────────
# using the vectorized formula
lat1 = lats[:, None]
lat2 = lats[None, :]
dlon = lons[None, :] - lons[:, None]

dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
dist_km = EARTH_R * c  # shape (N, N)

# ─── 3) build adjacency by k-nearest neighbors ───────────────────
A = np.zeros((N, N), dtype=np.float32)

for i in range(N):
    # argsort returns i itself at position 0, so skip it
    neighbors = np.argsort(dist_km[i])[1 : K_NEIGHBORS+1]
    A[i, neighbors] = 1.0

# symmetrize: if i→j or j→i, keep edge both ways
A = np.maximum(A, A.T)

# ─── 4) (Optional) row-normalize adjacency ──────────────────────
# so each row sums to 1
row_sums = A.sum(axis=1, keepdims=True)
A_norm   = A / np.where(row_sums>0, row_sums, 1.0)

# Now `A_norm` is your adjacency matrix to feed into the GNN
print("Adjacency matrix shape:", A_norm.shape)



Adjacency matrix shape: (19213, 19213)


In [50]:
@register_keras_serializable(package="Custom", name="SimpleGraphConv")
class SimpleGraphConv(Layer):
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        # input_shape = [(batch, N, F), (batch, N, N)]
        F = input_shape[0][-1]
        self.w = self.add_weight(
            shape=(F, self.units),
            initializer="glorot_uniform",
            name="kernel")
        super().build(input_shape)

    def call(self, inputs):
        X, A = inputs    # X: (batch, N, F), A: (batch, N, N)
        return tf.matmul(A, tf.matmul(X, self.w))


@register_keras_serializable(package="Custom", name="GCNTimeDistributed")
class GCNTimeDistributed(Layer):
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        # we’ll reuse this conv at each time slice
        self.gcn = SimpleGraphConv(units)

    def build(self, input_shape):
        # input_shape = [(batch, T, N, F), (batch, N, N)]
        # we need to build the inner GCN on a flattened time‐slice shape:
        # pretend batch' = None and time = 1 so shape = (None, N, F) & (None, N, N)
        _, T, N, F = input_shape[0]
        # call inner build:
        self.gcn.build([(None, N, F), (None, N, N)])
        super().build(input_shape)

    def call(self, inputs):
        X, A = inputs
        batch = tf.shape(X)[0]
        T     = tf.shape(X)[1]
        N     = tf.shape(X)[2]
        F     = tf.shape(X)[3]

        # 1) collapse time
        Xr = tf.reshape(X, (batch * T, N, F))
        # 2) tile adjacency
        Aexp = tf.expand_dims(A, 1)               # (batch, 1, N, N)
        Atil = tf.tile(Aexp, [1, T, 1, 1])         # (batch, T, N, N)
        Ar   = tf.reshape(Atil, (batch * T, N, N))
        # 3) apply GCN
        Yr = self.gcn([Xr, Ar])                    # (batch*T, N, units)
        # 4) restore time axis
        return tf.reshape(Yr, (batch, T, N, self.units))

In [51]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor="val_mae",
    factor=0.5,
    patience=3,
    verbose=1,
    min_lr=1e-6
)

Build GNN data

In [52]:
# 1) decide T, N, F
T = 1       # one snapshot per flight
N = 2       # origin + destination nodes
F = 5       # the five weather vars you listed

A_np = A_norm
gcn_units = 32

X_in = Input(shape=(T, N, F), name="node_features")       # dynamic nodal time‐series
A_in = Input(shape=(N, N),    name="adjacency_matrix")    # can be broadcast

weather_cols = X_train.columns[: N * F]  # first 10 columns
assert len(weather_cols) == N * F

# ─── C) extract and reshape ───────────────────────────────────────────────────
weather_train = X_train[weather_cols].to_numpy()  # shape (n_samples, 10)
weather_test  = X_test[ weather_cols].to_numpy()

# reshape into (batch, T, N, F)
X_train_gnn = weather_train.reshape(-1, T, N, F)
X_test_gnn  = weather_test.reshape(-1, T, N, F)

print(X_train_gnn.shape)  # → (952611, 1, 2, 5)

(952611, 1, 2, 5)


In [53]:
# ─── 2a) GCN‐over‐time layer 1
g = GCNTimeDistributed(gcn_units, name="time_gcn")([X_in, A_in])

# ─── (optional) 2nd GCN‐over‐time
g = GCNTimeDistributed(gcn_units, name="time_gcn2")([g, A_in])

# ─── 2b) flatten per‐time‐step but keep T

g_flat = TimeDistributed(Flatten(), name="flatten_nodes")(g)  
# shape = (batch, T, N * gcn_units)

# ─── 2c) LSTM
h = LSTM(64, name="temporal_lstm")(g_flat)  # now g_flat is 3D

# ─── 2d) final Dense (make sure units=Y_train.shape[1])
Y_dim = Y_train.shape[1]
out = Dense(Y_dim, name="output")(h)

In [54]:
# ─── 3) Compile, train & evaluate ────────────────────────────────────────────

gnn_lstm = Model([X_in, A_in], out, name="GNN_LSTM_Hybrid")
gnn_lstm.summary()
gnn_lstm.compile(optimizer="adam", loss="mse", metrics=["mae"])


# Define callbacks
checkpoint_cb = ModelCheckpoint(
    filepath="./models/gnn_lstm.keras",
    monitor="val_loss",
    save_best_only=True,
    verbose=1
)
earlystop_cb = EarlyStopping(
    monitor="val_loss",
    patience=5,
    verbose=1,
    restore_best_weights=True
)



In [None]:

# For origin ↔ dest only:
A2 = np.array([[0., 1.],
               [1., 0.]], dtype=np.float32)

# Tile it for every sample in the train/test set:
A_train = np.tile(A2[None], (X_train_gnn.shape[0], 1, 1))  # (952611,2,2)
A_test  = np.tile(A2[None], (X_test_gnn.shape[0],  1, 1))  # (… likewise)

print("A_train shape:", A_train.shape)



# Fit just like you did your LSTM models:
history = gnn_lstm.fit(
    [X_train_gnn, A_train],
    Y_train,
    batch_size=32,
    epochs=50,
    validation_split=0.2,
    callbacks=[checkpoint_cb, earlystop_cb, reduce_lr],
)

test_results = gnn_lstm.evaluate(
    [X_test_gnn, A_test],
    Y_test,
    verbose=0,
)
print("Test loss, MAE:", test_results)
test_results.to_csv("/models/results/gnn_test.csv")

A_train shape: (952611, 2, 2)
Epoch 1/50
[1m23813/23816[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 429.5786 - mae: 11.1594
Epoch 1: val_loss improved from inf to 437.54355, saving model to ./models/gnn_lstm.keras
[1m23816/23816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2ms/step - loss: 429.5766 - mae: 11.1595 - val_loss: 437.5435 - val_mae: 11.7251 - learning_rate: 0.0010
Epoch 2/50
[1m23799/23816[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 414.5328 - mae: 11.4718
Epoch 2: val_loss did not improve from 437.54355
[1m23816/23816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2ms/step - loss: 414.5326 - mae: 11.4718 - val_loss: 437.5435 - val_mae: 11.7251 - learning_rate: 0.0010
Epoch 3/50
[1m23810/23816[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 414.5327 - mae: 11.4718
Epoch 3: val_loss did not improve from 437.54355
[1m23816/23816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4

In [57]:
history.history.to_csv("/models/results/gnn_test.csv")
np.save("./data/X_train_gnn.npy", X_train_gnn)
np.save("./data/A_train.npy",     A_train)
np.save("./data/Y_train.npy",     Y_train)

np.save("./data/X_test_gnn.npy", X_test_gnn)
np.save("./data/A_test.npy",     A_test)
np.save("./data/Y_test.npy",     Y_test)

print("Saved GNN training data → ./data/")

Saved GNN training data → ./data/


In [56]:

# 1) decide T, N, F
T = 1       # one snapshot per flight
N = 2       # origin + destination nodes
F = 5       # the five weather vars you listed


A_np_aug = A_norm
gcn_units = 32

# Inputs
X_in_aug = Input(shape=(T, N, F), name="node_features")       # dynamic nodal time‐series
A_in_aug = Input(shape=(N, N),    name="adjacency_matrix")    # can be broadcast


# 2) pull out just the 10 weather columns from your 11
#    (assumes X_train[:, :10] are [orig_var0..4, dest_var0..4])
weather_cols_aug = X_train_aug.columns[: N * F]  # first 10 columns
assert len(weather_cols_aug) == N * F

# ─── C) extract and reshape ───────────────────────────────────────────────────
weather_train_aug = X_train_aug[weather_cols_aug].to_numpy()  # shape (n_samples, 10)
weather_test_aug  = X_test_aug[ weather_cols_aug].to_numpy()

# reshape into (batch, T, N, F)
X_train_gnn_aug = weather_train_aug.reshape(-1, T, N, F)
X_test_gnn_aug  = weather_test_aug.reshape(-1, T, N, F)

print(X_train_gnn_aug.shape)  # → (952611, 1, 2, 5)


(949836, 1, 2, 5)


In [58]:
# ─── 2) Build the Graph-LSTM hybrid ───────────────────────────────────────────

# ─── 2a) GCN‐over‐time layer 1
g_aug = GCNTimeDistributed(gcn_units, name="time_gcn")([X_in_aug, A_in_aug])

# ─── (optional) 2nd GCN‐over‐time
#g_aug = GCNTimeDistributed(gcn_units, name="time_gcn2")([g_aug, A_in_aug])

# ─── 2b) flatten per‐time‐step but keep T

g_flat_aug = TimeDistributed(Flatten(), name="flatten_nodes")(g_aug)  
# shape = (batch, T, N * gcn_units)

# ─── 2c) LSTM
h_aug = LSTM(64, name="temporal_lstm")(g_flat_aug)  # now g_flat is 3D

# ─── 2d) final Dense (make sure units=Y_train.shape[1])
Y_dim = Y_train_aug.shape[1]
out_aug = Dense(Y_dim, name="output")(h_aug)



In [None]:
# ─── 3) Compile, train & evaluate ────────────────────────────────────────────

# Assemble
gnn_lstm_aug = Model(inputs=[X_in_aug, A_in_aug], outputs=out_aug, name="GNN_LSTM_Hybrid")
gnn_lstm_aug.summary()


gnn_lstm_aug.compile(optimizer="adam",loss="mse",metrics=["mae"])


# Define callbacks
checkpoint_cb = ModelCheckpoint(
    filepath="./models/gnn_lstm_aug.keras",
    monitor="val_loss",
    save_best_only=True,
    verbose=1
)
earlystop_cb = EarlyStopping(
    monitor="val_loss",
    patience=5,
    verbose=1,
    restore_best_weights=True
)



In [None]:

# For origin ↔ dest only:
A2 = np.array([[0., 1.],
               [1., 0.]], dtype=np.float32)

# Tile it for every sample in the train/test set:
A_train_aug = np.tile(A2[None], (X_train_gnn_aug.shape[0], 1, 1))  # (952611,2,2)
A_test_aug  = np.tile(A2[None], (X_test_gnn_aug.shape[0],  1, 1))  # (… likewise)

print("A_train_aug shape:", A_train_aug.shape)


# Fit just like you did your LSTM models:
history_aug = gnn_lstm_aug.fit(
    [X_train_gnn_aug, A_train_aug],
    Y_train_aug,
    batch_size=32,
    epochs=50,
    validation_split=0.2,
    callbacks=[checkpoint_cb, earlystop_cb, reduce_lr],
)

# Evaluate on test:
test_results_aug = gnn_lstm_aug.evaluate(
    [X_test_gnn_aug, A_test_aug],
    Y_test_aug,
    verbose=0,
)
print("Aug Test loss, MAE:", test_results_aug)



A_train_aug shape: (949836, 2, 2)
Epoch 1/50
[1m23717/23746[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 422.3806 - mae: 11.1153 - mse: 422.3806
Epoch 1: val_loss improved from inf to 417.75284, saving model to ./models/gnn_lstm_aug.keras
[1m23746/23746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 2ms/step - loss: 422.3701 - mae: 11.1155 - mse: 422.3701 - val_loss: 417.7528 - val_mae: 11.4811 - val_mse: 417.7528 - learning_rate: 0.0010
Epoch 2/50
[1m23726/23746[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 410.2379 - mae: 11.3897 - mse: 410.2379
Epoch 2: val_loss did not improve from 417.75284
[1m23746/23746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 2ms/step - loss: 410.2389 - mae: 11.3897 - mse: 410.2389 - val_loss: 417.7528 - val_mae: 11.4811 - val_mse: 417.7528 - learning_rate: 0.0010
Epoch 3/50
[1m23734/23746[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 410.2383 - mae: 11.3897 -

In [None]:
history_aug.history.to_csv("/models/results/gnn_test_aug.csv")

np.save("./data/X_train_gnn_aug.npy", X_train_gnn_aug)
np.save("./data/A_train_aug.npy",     A_train_aug)
np.save("./data/Y_train_aug.npy",     Y_train_aug)

np.save("./data/X_test_gnn_aug.npy", X_test_gnn_aug)
np.save("./data/A_test_aug.npy",     A_test_aug)
np.save("./data/Y_test_aug.npy",     Y_test_aug)

print("Saved GNN aug training data → ./data/")

Saved GNN aug training data → ./data/
