# Flight Passenger Prediction

This notebook demonstrates loading flight passenger data, training an XGBoost model to predict passenger numbers, and visualizing the results.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Set up visualization style
sns.set(style="whitegrid")

In [None]:
def load_flight_data(file_path: str) -> pd.DataFrame:
    """Load flight passenger data from CSV file.
    
    Args:
        file_path: Path to CSV file containing flight data
    
    Returns:
        DataFrame with flight passenger data
    """
    df = pd.read_csv(file_path)
    # Add any necessary data cleaning/preprocessing here
    return df

In [None]:
def preprocess_data(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.Series]:
    """Prepare data for modeling.
    
    Args:
        df: Raw flight data DataFrame
    
    Returns:
        Tuple of (features, target) for modeling
    """
    # Example preprocessing - adapt to your data
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    
    features = df[['origin', 'destination', 'year', 'month']]
    target = df['passengers']
    
    # Convert categoricals to numerical
    features = pd.get_dummies(features, columns=['origin', 'destination'])
    
    return features, target

In [None]:
def train_xgboost_model(X: pd.DataFrame, y: pd.Series) -> xgb.XGBRegressor:
    """Train XGBoost model on flight data.
    
    Args:
        X: Features DataFrame
        y: Target Series
    
    Returns:
        Trained XGBoost model
    """
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = xgb.XGBRegressor(
        objective='reg:squarederror',
        n_estimators=100,
        max_depth=5,
        learning_rate=0.1
    )
    
    model.fit(X_train, y_train)
    
    # Evaluate model
    preds = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    print(f"Model RMSE: {rmse:.2f}")
    
    return model

In [None]:
def visualize_results(df: pd.DataFrame, model: xgb.XGBRegressor):
    """Create visualizations of model results.
    
    Args:
        df: Original flight data
        model: Trained XGBoost model
    """
    # Example visualizations - adapt to your data
    plt.figure(figsize=(12, 6))
    
    # 1. Actual vs Predicted
    plt.subplot(1, 2, 1)
    sns.scatterplot(x=df['passengers'], y=model.predict(df))
    plt.title('Actual vs Predicted Passengers')
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    
    # 2. Feature Importance
    plt.subplot(1, 2, 2)
    xgb.plot_importance(model)
    plt.title('Feature Importance')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Main execution
if __name__ == "__main__":
    # Load data
    df = load_flight_data("flight_data.csv")
    
    # Preprocess
    X, y = preprocess_data(df)
    
    # Train model
    model = train_xgboost_model(X, y)
    
    # Visualize results
    visualize_results(df, model)

In [None]:
# Export notebook to HTML
!jupyter nbconvert --to html test_enac.ipynb