# Flight Delay Prediction

## Data Loading and Preprocessing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd

In [None]:
# Load the cleaned flight data
flights_df = pd.read_parquet("/content/drive/MyDrive/cleaned_flights.parquet")

In [None]:
# Extract the hour from the 'DATE' column and create a new column 'DEPARTURE_HOUR'
flights_df['DEPARTURE_HOUR'] = flights_df['DATE'].dt.hour

In [None]:
# Replace missing values in 'DAILY_SNOWFALL' with 0
flights_df['DAILY_SNOWFALL'] = flights_df['DAILY_SNOWFALL'].fillna(0)

In [None]:
# Select only delayed flights from flights_df
delayed_flights = flights_df[flights_df['ARRIVAL_DELAY'] > 0].copy()

## JAX Implementation

In [None]:
import jax
import jax.numpy as jnp
from jax import random, grad, jit, value_and_grad
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Standardize numeric columns for PyTorch models
for col in ['MONTH', 'DEPARTURE_HOUR', 'DAY_OF_WEEK', 'DISTANCE']:
    col_mean = delayed_flights[col].mean()
    col_std  = delayed_flights[col].std()
    delayed_flights[col] = (delayed_flights[col] - col_mean) / col_std

In [None]:
# Extract features for PyTorch models
numeric_feats = delayed_flights[['MONTH', 'DEPARTURE_HOUR', 'DAY_OF_WEEK', 'DISTANCE', 'DAILY_SNOWFALL']].astype(jnp.float32).values
categorical_feats = pd.get_dummies(delayed_flights[['AIRLINE', 'origin_airport/AIRPORT', 'destination_airport/AIRPORT']]).values
X = jnp.hstack([numeric_feats, categorical_feats])
y = delayed_flights['ARRIVAL_DELAY'].values

In [None]:
# Initialize TPU for JAX
jax_devices = jax.devices("tpu")
print(f"Using device: {jax_devices[0]}")

In [None]:
# Convert data to JAX arrays
X_jax = jnp.array(X, dtype=jnp.float32)
y_jax = jnp.array(y, dtype=jnp.float32).reshape(-1, 1)

In [None]:
# Split data into train/test sets
train_size = int(0.8 * len(X_jax))
test_size = len(X_jax) - train_size

X_train, X_test = X_jax[:train_size], X_jax[train_size:]
y_train, y_test = y_jax[:train_size], y_jax[train_size:]

# Define batch size
batch_size = 32

# Function to create batches
def create_batches(X, y, batch_size):
    n_batches = len(X) // batch_size
    for i in range(n_batches):
        yield X[i * batch_size: (i + 1) * batch_size], y[i * batch_size: (i + 1) * batch_size]

In [None]:
# Define Feedforward Neural Network in JAX
class FeedForwardNN:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        self.input_size = input_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.output_size = output_size

        # Initialize weights and biases
        self.W1 = random.normal(random.PRNGKey(0), (input_size, hidden_size1)) * jnp.sqrt(2.0 / input_size)
        self.b1 = jnp.zeros(hidden_size1)
        self.W2 = random.normal(random.PRNGKey(1), (hidden_size1, hidden_size2)) * jnp.sqrt(2.0 / hidden_size1)
        self.b2 = jnp.zeros(hidden_size2)
        self.W3 = random.normal(random.PRNGKey(2), (hidden_size2, output_size)) * jnp.sqrt(2.0 / hidden_size2)
        self.b3 = jnp.zeros(output_size)

    def forward(self, X):
        # First hidden layer
        hidden1 = jax.nn.relu(jnp.dot(X, self.W1) + self.b1)
        # Second hidden layer
        hidden2 = jax.nn.relu(jnp.dot(hidden1, self.W2) + self.b2)
        # Output layer
        output = jnp.dot(hidden2, self.W3) + self.b3
        return output

In [None]:
# Instantiate the model, define loss function and optimizer
input_size = X.shape[1]
hidden_size1 = 64  # Example hidden layer size
hidden_size2 = 32   # Example second hidden layer size
output_size = 1     # Predicting a single value (arrival delay)

model = FeedForwardNN(input_size, hidden_size1, hidden_size2, output_size)

In [None]:
# Loss function
def mse_loss(params, X, y):
    # Create an instance of the model using the params
    model_instance = FeedForwardNN(params[0][0].shape[0], params[0][0].shape[1], params[1][0].shape[1], params[2][0].shape[1])
    # Assuming params is a list of tuples: [(W1, b1), (W2, b2), (W3, b3)]

    # Update the model's weights and biases with the current params
    model_instance.W1, model_instance.b1 = params[0]
    model_instance.W2, model_instance.b2 = params[1]
    model_instance.W3, model_instance.b3 = params[2]

    # Now use the forward method to make predictions
    preds = model_instance.forward(X)
    return jnp.mean((preds - y) ** 2)

# Function to initialize NN parameters
def init_nn_params(layer_sizes, rng_key):
    '''Initialize the parameters of a feedforward neural network.'''
    params = []
    keys = random.split(rng_key, len(layer_sizes) - 1)
    for in_size, out_size, key in zip(layer_sizes[:-1], layer_sizes[1:], keys):
        W = random.normal(key, (in_size, out_size)) * jnp.sqrt(2.0 / in_size)
        b = jnp.zeros(out_size)
        params.append((W, b))
    return params

# Update predict function to use forward method
def predict(params, X):
    model_instance = FeedForwardNN(params[0][0].shape[0], params[0][0].shape[1], params[1][0].shape[1], params[2][0].shape[1])
    model_instance.W1, model_instance.b1 = params[0]
    model_instance.W2, model_instance.b2 = params[1]
    model_instance.W3, model_instance.b3 = params[2]
    return model_instance.forward(X)

# Define the update step (train_step)
@jit
def train_step(params, X, y, learning_rate):
    """Updates model parameters using gradient descent."""
    loss, grads = value_and_grad(mse_loss)(params, X, y)
    # Update parameters
    updated_params = []
    for param, grad in zip(params, grads):
        updated_params.append((param[0] - learning_rate * grad[0],
                               param[1] - learning_rate * grad[1]))
    return updated_params

In [None]:
# Training loop
rng_key = random.PRNGKey(0)
layer_sizes = [X_train.shape[1], 128, 64, 1]
params = init_nn_params(layer_sizes, rng_key)
num_epochs = 20
learning_rate = 0.00001

for epoch in range(num_epochs):
    for X_batch, y_batch in create_batches(X_train, y_train, batch_size):
        params = train_step(params, X_batch, y_batch, learning_rate)
    # Calculate training loss on a subset of the data to reduce memory usage
    train_loss = mse_loss(params, X_train[:10000], y_train[:10000])  # Using a subset of 10000 samples
    print(f"Epoch {epoch + 1}, Training Loss: {train_loss:.4f}")

In [None]:
# Compile the predict function using jit
@jit
def predict(params, X):
    model_instance = FeedForwardNN(params[0][0].shape[0], params[0][0].shape[1], params[1][0].shape[1], params[2][0].shape[1])  # Create model instance
    model_instance.W1, model_instance.b1 = params[0]  # Set weights and biases
    model_instance.W2, model_instance.b2 = params[1]
    model_instance.W3, model_instance.b3 = params[2]
    return model_instance.forward(X)  # Use forward method for prediction


y_pred = predict(params, X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = jnp.sqrt(mse)  # Assuming np is imported as numpy

print(f"Test R-squared (R2): {r2:.4f}")
print(f"Test Mean Squared Error (MSE): {mse:.4f}")
print(f"Test Root Mean Squared Error (RMSE): {rmse:.4f}")