In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Set random seed for reproducibility
np.random.seed(42)

def create_airline_financial_dataset():
    """Create comprehensive airline financial performance dataset"""
    
    # Generate dates for the past 3 years
    dates = pd.date_range(start='2021-01-01', end='2023-12-31', freq='M')
    n_periods = len(dates)
    
    # Regions and aircraft types
    regions = ['Middle East', 'Europe', 'Asia', 'Australia', 'North America']
    aircraft_types = ['Boeing 787', 'Airbus A380', 'Boeing 777', 'Airbus A350', 'Boeing 737']
    
    data = []
    
    for date in dates:
        for region in regions:
            for aircraft in aircraft_types:
                # Base metrics with some randomness
                passengers = np.random.randint(5000, 50000)
                revenue = passengers * np.random.uniform(300, 800)
                operating_costs = revenue * np.random.uniform(0.6, 0.85)
                fuel_costs = operating_costs * np.random.uniform(0.25, 0.35)
                maintenance_costs = operating_costs * np.random.uniform(0.15, 0.25)
                staff_costs = operating_costs * np.random.uniform(0.20, 0.30)
                
                # Calculate derived metrics
                gross_profit = revenue - operating_costs
                load_factor = np.random.uniform(0.65, 0.95)
                yield_per_rpm = revenue / (passengers * np.random.uniform(1000, 5000))
                
                data.append({
                    'Date': date,
                    'Region': region,
                    'Aircraft_Type': aircraft,
                    'Passengers': passengers,
                    'Revenue_USD': round(revenue, 2),
                    'Operating_Costs_USD': round(operating_costs, 2),
                    'Fuel_Costs_USD': round(fuel_costs, 2),
                    'Maintenance_Costs_USD': round(maintenance_costs, 2),
                    'Staff_Costs_USD': round(staff_costs, 2),
                    'Gross_Profit_USD': round(gross_profit, 2),
                    'Load_Factor': round(load_factor, 3),
                    'Yield_per_RPM': round(yield_per_rpm, 4),
                    'Available_Seat_Miles': passengers * np.random.randint(1000, 5000),
                    'Revenue_Passenger_Miles': passengers * np.random.randint(800, 4500)
                })
    
    df = pd.DataFrame(data)
    return df

# Generate the dataset
financial_data = create_airline_financial_dataset()
financial_data.to_csv('airline_financial_performance.csv', index=False)
print("Financial dataset created with shape:", financial_data.shape)

Financial dataset created with shape: (900, 14)


  dates = pd.date_range(start='2021-01-01', end='2023-12-31', freq='M')
