# Flight Delay Prediction

## Data Loading and Preprocessing

In [None]:
import pandas as pd

In [None]:
# Load the cleaned flight data
flights_df = pd.read_parquet("cleaned_flights.parquet")

In [None]:
# Extract the hour from the 'DATE' column and create a new column 'DEPARTURE_HOUR'
flights_df['DEPARTURE_HOUR'] = flights_df['DATE'].dt.hour

In [None]:
# Replace missing values in 'DAILY_SNOWFALL' with 0
flights_df['DAILY_SNOWFALL'] = flights_df['DAILY_SNOWFALL'].fillna(0)

In [None]:
# Select only delayed flights from flights_df
delayed_flights = flights_df[flights_df['ARRIVAL_DELAY'] > 0].copy()

## MLX Neural Network Model

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
import mlx.core as mx
import mlx.nn as nn
import mlx.optimizers as optim

In [None]:
# Example for DEPARTURE_HOUR
delayed_flights['HOUR_SIN'] = np.sin(2 * np.pi * delayed_flights['DEPARTURE_HOUR']/24.0)
delayed_flights['HOUR_COS'] = np.cos(2 * np.pi * delayed_flights['DEPARTURE_HOUR']/24.0)

# Example for MONTH
delayed_flights['MONTH_SIN'] = np.sin(2 * np.pi * delayed_flights['MONTH']/12.0)
delayed_flights['MONTH_COS'] = np.cos(2 * np.pi * delayed_flights['MONTH']/12.0)

# Example for DAY_OF_WEEK (assuming 0-6 or 1-7)
# Adjust the divisor based on your range (e.g., 7 if 0-6 or 1-7)
delayed_flights['DAY_SIN'] = np.sin(2 * np.pi * delayed_flights['DAY_OF_WEEK']/7.0)
delayed_flights['DAY_COS'] = np.cos(2 * np.pi * delayed_flights['DAY_OF_WEEK']/7.0)

In [None]:
# Define features and target
features = ['HOUR_SIN', 'HOUR_COS', 'MONTH_SIN', 'MONTH_COS', 'DAY_SIN', 'DAY_COS',
            'DISTANCE', 'DAILY_SNOWFALL', 'AIRLINE', 'origin_airport/AIRPORT',
            'destination_airport/AIRPORT']
target = 'ARRIVAL_DELAY'

# Separate features (X) and target (y)
X = delayed_flights[features]
y = delayed_flights[target]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define numeric and categorical features
numeric_features = ['HOUR_SIN', 'HOUR_COS', 'MONTH_SIN', 'MONTH_COS', 'DAY_SIN', 'DAY_COS',
                    'DISTANCE', 'DAILY_SNOWFALL']
categorical_features = ['AIRLINE', 'origin_airport/AIRPORT', 'destination_airport/AIRPORT']

# Create preprocessing pipelines for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore')) # Ignore categories present only in test set
])

# Create a column transformer to apply different transformations to different columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Apply preprocessing: fit on training data, transform both train and test data
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Convert sparse matrix from OneHotEncoder to dense array if necessary
if hasattr(X_train_processed, "toarray"):
    X_train_np = X_train_processed.toarray()
else:
    X_train_np = X_train_processed

if hasattr(X_test_processed, "toarray"):
    X_test_np = X_test_processed.toarray()
else:
    X_test_np = X_test_processed

# Convert target Series to NumPy arrays
y_train_np = y_train.values
y_test_np = y_test.values

In [None]:
# Convert NumPy arrays to MLX arrays
X_train_mlx = mx.array(X_train_np, dtype=mx.float32)
y_train_mlx = mx.array(y_train_np, dtype=mx.float32).reshape(-1, 1)
X_test_mlx = mx.array(X_test_np, dtype=mx.float32)
y_test_mlx = mx.array(y_test_np, dtype=mx.float32).reshape(-1, 1)

In [None]:
class BatchNormMLXModel(nn.Module):
    def __init__(self, input_dim: int, dropout_prob: float = 0.2):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.bn1 = nn.BatchNorm(256) # Add BatchNorm
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=dropout_prob)

        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm(128) # Add BatchNorm
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=dropout_prob)

        self.fc3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm(64) # Add BatchNorm
        self.relu3 = nn.ReLU()
        # No dropout before final layer

        self.fc4 = nn.Linear(64, 1)

    def __call__(self, x, training: bool = False):
        x = self.fc1(x)
        # Apply BatchNorm - it uses running stats during eval, updates during train
        x = self.bn1(x)
        x = self.relu1(x)
        if training:
            x = self.dropout1(x)

        x = self.fc2(x)
        x = self.bn2(x) # Apply BatchNorm
        x = self.relu2(x)
        if training:
            x = self.dropout2(x)

        x = self.fc3(x)
        x = self.bn3(x) # Apply BatchNorm
        x = self.relu3(x)

        x = self.fc4(x)
        return x

# Initialize the enhanced model
input_dim = X_train_mlx.shape[1]
dropout_rate = 0.2 # Example dropout rate, tune as needed
mlx_model = BatchNormMLXModel(input_dim, dropout_prob=dropout_rate)
mx.eval(mlx_model.parameters())

In [None]:
def mse_loss(model, x, y):
    # Pass training=True to enable dropout during loss calculation for training
    return mx.mean((model(x, training=True) - y) ** 2)

# Loss and gradient function remains the same conceptually
loss_and_grad_fn = nn.value_and_grad(mlx_model, mse_loss)

In [None]:
# Training loop for MLX model
epochs = 10
batch_size = 128
num_samples = X_train_mlx.shape[0] # Use training data size
num_batches_per_epoch = (num_samples + batch_size - 1) // batch_size
total_steps = epochs * num_batches_per_epoch
initial_lr = 0.001 # Your starting LR
lr_schedule = optim.cosine_decay(initial_lr, total_steps)
optimizer = optim.AdamW(learning_rate=lr_schedule, weight_decay=0.01) # Use schedule


for epoch in range(epochs):
    epoch_loss = 0.0
    mlx_model.train() # Set model to training mode (enables dropout)
    permutation = np.random.permutation(num_samples)
    for i in range(0, num_samples, batch_size):
        batch_indices = permutation[i:i+batch_size]
        batch_indices_mlx = mx.array(batch_indices, dtype=mx.int32)
        X_batch = X_train_mlx[batch_indices_mlx]
        y_batch = y_train_mlx[batch_indices_mlx]

        # Loss and gradients calculated with dropout enabled via mse_loss -> model(..., training=True)
        loss, grads = loss_and_grad_fn(mlx_model, X_batch, y_batch)

        optimizer.update(mlx_model, grads)
        mx.eval(mlx_model.parameters(), optimizer.state)
        epoch_loss += loss.item()

    num_batches = (num_samples + batch_size - 1) // batch_size
    avg_loss = epoch_loss / num_batches
    print(f"Epoch {epoch}, Average Loss: {avg_loss:.4f}")

In [None]:
# Evaluate MLX model on the test set
print("Evaluating MLX model on test set...")
mlx_model.eval() # Set model to evaluation mode (disables dropout)
# Pass training=False to disable dropout during evaluation
y_pred_test_mlx = mlx_model(X_test_mlx, training=False)
mx.eval(y_pred_test_mlx)

y_pred_test_mlx_np = np.array(y_pred_test_mlx.squeeze())

mlx_mse_test = mean_squared_error(y_test_np, y_pred_test_mlx_np)
mlx_r2_test = r2_score(y_test_np, y_pred_test_mlx_np)

print(f"MLX Test Mean Squared Error (MSE): {mlx_mse_test}")
print(f"MLX Test R² Score: {mlx_r2_test}")