# Flight Delay Prediction - GPU Accelerated Models

This notebook trains multiple ML models (Decision Tree, Random Forest, Gradient Boosting) to predict flight delays (15+ minutes) using airline data with **GPU/TPU acceleration**.

## üöÄ Features
- **Target**: 15+ minute delays (17.8% positive class)
- **Features**: 63 engineered features from flight data
- **Samples**: 400K training, 100K test
- **Models**: Decision Tree, Random Forest, Gradient Boosting
- **GPU/TPU**: Automatic detection and optimization
- **Performance**: Comprehensive model comparison


In [None]:
# Install required packages
%pip install -q torch tensorflow xgboost lightgbm

print("‚úÖ All packages installed successfully!")


In [None]:
# Import all required libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    precision_score, recall_score, f1_score, roc_auc_score, roc_curve
)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

# GPU/TPU detection and setup
import os
import torch
import tensorflow as tf

# Set style for better plots
plt.style.use('default')
sns.set_palette("husl")

print("‚úÖ Libraries imported successfully!")


In [None]:
# GPU/TPU Detection and Setup
def setup_gpu_acceleration():
    """
    Set up GPU acceleration for training.
    """
    print("=== GPU/TPU ACCELERATION SETUP ===")
    
    # Check for TPU
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('üöÄ Running on TPU:', tpu.master())
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print(f'TPU devices: {tpu_strategy.num_replicas_in_sync}')
        return 'tpu', tpu_strategy
    except:
        print('‚ùå TPU not available')
    
    # Check for GPU
    if torch.cuda.is_available():
        print(f'üöÄ GPU available: {torch.cuda.get_device_name(0)}')
        print(f'GPU count: {torch.cuda.device_count()}')
        print(f'Current GPU: {torch.cuda.current_device()}')
        return 'gpu', torch.cuda.device_count()
    
    # Check TensorFlow GPU
    if tf.config.list_physical_devices('GPU'):
        gpus = tf.config.list_physical_devices('GPU')
        print(f'üöÄ TensorFlow GPU devices: {len(gpus)}')
        for gpu in gpus:
            print(f'  {gpu}')
        return 'gpu', len(gpus)
    
    print('‚ö†Ô∏è  No GPU/TPU available, using CPU')
    return 'cpu', 1

# Initialize acceleration
device_type, device_count = setup_gpu_acceleration()
print(f"\nüéØ Using: {device_type.upper()} with {device_count} device(s)")


In [None]:
# Load preprocessed data
print("üìä Loading preprocessed data...")

# Load the numpy files (adjust path based on your dataset name)
X_train = np.load('/kaggle/input/flight-delay-data/X_train.npy')
X_test = np.load('/kaggle/input/flight-delay-data/X_test.npy')
y_train = np.load('/kaggle/input/flight-delay-data/y_train.npy')
y_test = np.load('/kaggle/input/flight-delay-data/y_test.npy')

print(f"‚úÖ Data loaded successfully!")
print(f"üìà Data shapes:")
print(f"  X_train: {X_train.shape}")
print(f"  X_test: {X_test.shape}")
print(f"  y_train: {y_train.shape}")
print(f"  y_test: {y_test.shape}")
print(f"  Positive class rate: {y_train.mean():.3f}")
print(f"  Total samples: {len(X_train) + len(X_test):,}")
