In [7]:
import requests
import os
import json
import time
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime, timedelta
import pickle
from functools import lru_cache

# ===== Configuration =====
CACHE_EXPIRY = 24 * 60 * 60  # Cache expiry in seconds (24 hours)
CACHE_FILE = "flight_data_cache.pkl"
HISTORICAL_WEIGHT = 0.6      # Weight for historical data (adjust as needed)
RECENT_WEIGHT = 0.4          # Weight for recent data (adjust as needed)

# ===== API Interface Layer =====
class FlightDataAPI:
    """Class to handle all API interactions with AeroDataBox."""
    
    def __init__(self, api_key=None):
        """Initialize the API client with authentication."""
        # Load API key from environment or parameter
        if api_key is None:
            load_dotenv()
            api_key = os.getenv("RAPIDAPI_KEY")
            
        if not api_key:
            raise ValueError("API key not provided and not found in environment")
            
        self.headers = {
            "x-rapidapi-key": api_key,
            "x-rapidapi-host": "aerodatabox.p.rapidapi.com"
        }
        self.base_url = "https://aerodatabox.p.rapidapi.com"
        
        # Initialize cache
        self.cache = self._load_cache()
    
    def _load_cache(self):
        """Load the cache from disk if it exists."""
        try:
            if os.path.exists(CACHE_FILE):
                with open(CACHE_FILE, "rb") as f:
                    cached_data = pickle.load(f)
                    # Filter out expired cache entries
                    now = time.time()
                    filtered_cache = {
                        key: (data, timestamp) 
                        for key, (data, timestamp) in cached_data.items() 
                        if now - timestamp < CACHE_EXPIRY
                    }
                    return filtered_cache
        except Exception as e:
            print(f"Error loading cache: {e}")
        return {}
    
    def _save_cache(self):
        """Save the cache to disk."""
        try:
            with open(CACHE_FILE, "wb") as f:
                pickle.dump(self.cache, f)
        except Exception as e:
            print(f"Error saving cache: {e}")
    
    def _get_cached_or_fetch(self, cache_key, fetch_func):
        """Get data from cache if available, otherwise fetch from API."""
        now = time.time()
        
        if cache_key in self.cache:
            data, timestamp = self.cache[cache_key]
            if now - timestamp < CACHE_EXPIRY:
                cache_age = round((now - timestamp) / 60)
                print(f"✓ Using cached data for {cache_key} (cached {cache_age} minutes ago)")
                return data
            else:
                print(f"✗ Cache expired for {cache_key}, fetching fresh data...")
        else:
            print(f"✗ No cache found for {cache_key}, fetching from API...")
        
        # Fetch fresh data
        data = fetch_func()
        if data:
            self.cache[cache_key] = (data, now)
            self._save_cache()
            print(f"✓ Saved new data to cache for {cache_key}")
        return data
    
    def get_historical_delay_stats(self, flight_number):
        """Fetch historical delay statistics for a flight number."""
        cache_key = f"historical_{flight_number}"
        
        def fetch_func():
            url = f"{self.base_url}/flights/{flight_number}/delays"
            try:
                response = requests.get(url, headers=self.headers, timeout=15)
                
                # Handle 204 No Content specifically
                if response.status_code == 204:
                    print(f"  ⚠️ No historical data available for {flight_number} (API returned 204 No Content)")
                    return None
                
                response.raise_for_status()
                print(f"  Successfully fetched historical data for {flight_number} from API")
                return response.json()
            except requests.exceptions.HTTPError as http_err:
                print(f"  ⚠️ HTTP error fetching historical data for {flight_number}: {http_err}")
                return None
            except json.JSONDecodeError:
                print(f"  ⚠️ Could not parse API response for {flight_number} (empty or invalid JSON)")
                return None
            except Exception as e:
                print(f"  ⚠️ Error fetching historical data for {flight_number}: {e}")
                return None
        
        return self._get_cached_or_fetch(cache_key, fetch_func)
    
    def get_recent_flights(self, flight_number, days_back=7):
        """Fetch recent flight data for the past days."""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days_back)
        
        start_str = start_date.strftime("%Y-%m-%d")
        end_str = end_date.strftime("%Y-%m-%d")
        cache_key = f"recent_{flight_number}_{start_str}_{end_str}"
        
        def fetch_func():
            url = f"{self.base_url}/flights/number/{flight_number}/{start_str}/{end_str}?dateLocalRole=Both"
            try:
                response = requests.get(url, headers=self.headers, timeout=15)
                
                # Handle 204 No Content specifically
                if response.status_code == 204:
                    print(f"  ⚠️ No recent flights found for {flight_number} ({start_str} to {end_str}) (API returned 204 No Content)")
                    return None
                
                response.raise_for_status()
                
                # Check if we got an empty array
                data = response.json()
                if isinstance(data, list) and len(data) == 0:
                    print(f"  ⚠️ No recent flights found for {flight_number} ({start_str} to {end_str}) (empty array returned)")
                    return None
                
                print(f"  Successfully fetched recent data for {flight_number} ({start_str} to {end_str}) from API")
                return data
            except requests.exceptions.HTTPError as http_err:
                print(f"  ⚠️ HTTP error fetching recent data for {flight_number}: {http_err}")
                return None
            except json.JSONDecodeError:
                print(f"  ⚠️ Could not parse API response for {flight_number} (empty or invalid JSON)")
                return None
            except Exception as e:
                print(f"  ⚠️ Error fetching recent data for {flight_number}: {e}")
                return None
        
        return self._get_cached_or_fetch(cache_key, fetch_func)
    
    def batch_fetch_flights(self, flight_list):
        """Fetch data for multiple flights with explicit cache status."""
        results = {}
        
        print(f"\n===== Processing {len(flight_list)} flights =====")
        
        # Process each flight sequentially to avoid interleaved log messages
        for flight in flight_list:
            flight_number = flight["flight_number"]
            print(f"\n--- Flight: {flight_number} ({flight.get('airline', 'Unknown')}) ---")
            
            # Get historical data
            print(f"Historical data for {flight_number}:")
            historical_data = self.get_historical_delay_stats(flight_number)
            
            # Get recent data
            print(f"Recent data for {flight_number}:")
            recent_data = self.get_recent_flights(flight_number)
            
            # Store results
            results[flight_number] = {
                "historical": historical_data,
                "recent": recent_data
            }
        
        return results

# ===== Data Processing Layer =====
class FlightDataProcessor:
    """Process raw API responses into structured data."""
    
    @staticmethod
    def process_historical_delay_stats(flight_data):
        """
        Process historical delay statistics from API response,
        prioritizing arrival (destination) delays when available.
        """
        if not flight_data:
            return None
            
        # Extract flight number
        flight_number = flight_data.get('number')
        
        # Initialize results dictionary
        results = {
            'flight_number': flight_number,
            'departure_options': [],
            'arrival_options': [],
            'overall': {}
        }
        
        # Process departure data (origins)
        departure_total_flights = 0
        departure_all_from_dates = []
        departure_all_to_dates = []
        departure_weighted_delay_sum = 0
        
        for origin in flight_data.get('origins', []):
            airport = origin.get('airportIcao')
            hour = origin.get('scheduledHourUtc')
            flights_analyzed = origin.get('numConsideredFlights', 0)
            departure_total_flights += flights_analyzed
            
            # Extract date range
            from_date = origin.get('fromUtc')
            to_date = origin.get('toUtc')
            if from_date:
                departure_all_from_dates.append(from_date)
            if to_date:
                departure_all_to_dates.append(to_date)
            
            # Calculate delay percentages
            on_time_percentage = 0
            slight_delay = 0   # 15-30 minutes
            moderate_delay = 0  # 30-60 minutes
            severe_delay = 0   # > 60 minutes
            
            for bracket in origin.get('numFlightsDelayedBrackets', []):
                delayed_from = bracket.get('delayedFrom')
                delayed_to = bracket.get('delayedTo')
                percentage = bracket.get('percentage', 0)
                
                # On-time flights (-15 min to +15 min)
                if delayed_from == '-00:15:00' and delayed_to == '00:15:00':
                    on_time_percentage = percentage * 100
                # Slight delay (15-30 min)
                elif delayed_from == '00:15:00' and delayed_to == '00:30:00':
                    slight_delay = percentage * 100
                # Moderate delay (30-60 min)
                elif delayed_from == '00:30:00' and delayed_to == '01:00:00':
                    moderate_delay = percentage * 100
                # Severe delay (60+ min)
                elif delayed_from == '01:00:00' and delayed_to == '02:00:00':
                    severe_delay += percentage * 100
                # Extreme delay (2+ hours)
                elif delayed_from == '02:00:00':
                    severe_delay += percentage * 100
            
            # Calculate total delayed percentage (flights delayed > 15 min)
            delayed_percentage = 100 - on_time_percentage
            departure_weighted_delay_sum += delayed_percentage * flights_analyzed
            
            # Get the median delay and 90th percentile
            median_delay = origin.get('medianDelay', 'Unknown')
            percentile_90 = next((p.get('delay') for p in origin.get('delayPercentiles', []) 
                                 if p.get('percentile') == 90), 'Unknown')
            
            # Add to results
            results['departure_options'].append({
                'airport': airport,
                'hour_utc': hour,
                'flights_analyzed': flights_analyzed,
                'date_range': f"{from_date} to {to_date}",
                'delayed_percentage': round(delayed_percentage, 1),
                'on_time_percentage': round(on_time_percentage, 1),
                'delay_buckets': {
                    'slight_delay_15_30min': round(slight_delay, 1),
                    'moderate_delay_30_60min': round(moderate_delay, 1),
                    'severe_delay_60min_plus': round(severe_delay, 1)
                },
                'median_delay': median_delay,
                '90th_percentile_delay': percentile_90
            })
        
        # Process arrival data (destinations)
        arrival_total_flights = 0
        arrival_all_from_dates = []
        arrival_all_to_dates = []
        arrival_weighted_delay_sum = 0
        
        for destination in flight_data.get('destinations', []):
            airport = destination.get('airportIcao')
            hour = destination.get('scheduledHourUtc')
            flights_analyzed = destination.get('numConsideredFlights', 0)
            arrival_total_flights += flights_analyzed
            
            # Extract date range
            from_date = destination.get('fromUtc')
            to_date = destination.get('toUtc')
            if from_date:
                arrival_all_from_dates.append(from_date)
            if to_date:
                arrival_all_to_dates.append(to_date)
            
            # Calculate delay percentages
            on_time_percentage = 0
            slight_delay = 0   # 15-30 minutes
            moderate_delay = 0  # 30-60 minutes
            severe_delay = 0   # > 60 minutes
            
            for bracket in destination.get('numFlightsDelayedBrackets', []):
                delayed_from = bracket.get('delayedFrom')
                delayed_to = bracket.get('delayedTo')
                percentage = bracket.get('percentage', 0)
                
                # On-time flights (-15 min to +15 min)
                if delayed_from == '-00:15:00' and delayed_to == '00:15:00':
                    on_time_percentage = percentage * 100
                # Slight delay (15-30 min)
                elif delayed_from == '00:15:00' and delayed_to == '00:30:00':
                    slight_delay = percentage * 100
                # Moderate delay (30-60 min)
                elif delayed_from == '00:30:00' and delayed_to == '01:00:00':
                    moderate_delay = percentage * 100
                # Severe delay (60+ min)
                elif delayed_from == '01:00:00' and delayed_to == '02:00:00':
                    severe_delay += percentage * 100
                # Extreme delay (2+ hours)
                elif delayed_from == '02:00:00':
                    severe_delay += percentage * 100
            
            # Calculate total delayed percentage (flights delayed > 15 min)
            delayed_percentage = 100 - on_time_percentage
            arrival_weighted_delay_sum += delayed_percentage * flights_analyzed
            
            # Get the median delay and 90th percentile
            median_delay = destination.get('medianDelay', 'Unknown')
            percentile_90 = next((p.get('delay') for p in destination.get('delayPercentiles', []) 
                                 if p.get('percentile') == 90), 'Unknown')
            
            # Add to results
            results['arrival_options'].append({
                'airport': airport,
                'hour_utc': hour,
                'flights_analyzed': flights_analyzed,
                'date_range': f"{from_date} to {to_date}",
                'delayed_percentage': round(delayed_percentage, 1),
                'on_time_percentage': round(on_time_percentage, 1),
                'delay_buckets': {
                    'slight_delay_15_30min': round(slight_delay, 1),
                    'moderate_delay_30_60min': round(moderate_delay, 1),
                    'severe_delay_60min_plus': round(severe_delay, 1)
                },
                'median_delay': median_delay,
                '90th_percentile_delay': percentile_90
            })
        
        # Calculate overall statistics - PRIORITIZE ARRIVAL DATA WHEN AVAILABLE
        has_arrival_data = len(arrival_all_from_dates) > 0
        
        if has_arrival_data:
            # Use arrival data for overall metrics when available
            earliest_date = min(arrival_all_from_dates) if arrival_all_from_dates else "Unknown"
            latest_date = max(arrival_all_to_dates) if arrival_all_to_dates else "Unknown"
            overall_delayed_percentage = (arrival_weighted_delay_sum / arrival_total_flights) if arrival_total_flights > 0 else 0
            
            results['overall'] = {
                'total_flights_analyzed': arrival_total_flights,
                'overall_date_range': f"{earliest_date} to {latest_date}",
                'overall_delayed_percentage': round(overall_delayed_percentage, 1),
                'data_type': 'arrival',  # Indicate we're using arrival data
                'departure_flights_analyzed': departure_total_flights  # Include departure count for reference
            }
        else:
            # Fall back to departure data if no arrival data exists
            earliest_date = min(departure_all_from_dates) if departure_all_from_dates else "Unknown"
            latest_date = max(departure_all_to_dates) if departure_all_to_dates else "Unknown"
            overall_delayed_percentage = (departure_weighted_delay_sum / departure_total_flights) if departure_total_flights > 0 else 0
            
            results['overall'] = {
                'total_flights_analyzed': departure_total_flights,
                'overall_date_range': f"{earliest_date} to {latest_date}",
                'overall_delayed_percentage': round(overall_delayed_percentage, 1),
                'data_type': 'departure'  # Indicate we're using departure data
            }
        
        return results
    
    @staticmethod
    def process_recent_flight_data(flight_data, include_predictions=True):
        """
        Process recent flight data from API response.
        """
        if not flight_data:
            return None
            
        # Extract flight number from first record
        flight_number = flight_data[0].get("number", "Unknown") if flight_data else "Unknown"
        airline_name = flight_data[0].get("airline", {}).get("name", "Unknown") if flight_data else "Unknown"
        
        # Extract route information
        departure_airport = flight_data[0].get("departure", {}).get("airport", {}) if flight_data else {}
        arrival_airport = flight_data[0].get("arrival", {}).get("airport", {}) if flight_data else {}
        route = f"{departure_airport.get('iata', '')} → {arrival_airport.get('iata', '')}"
        
        # Initialize result dictionary
        result = {
            "flight_number": flight_number,
            "airline": airline_name,
            "route": route,
            "total_flights": len(flight_data),
            "date_range": "",
            "individual_flights": [],
            "delay_statistics": {
                "departure": {
                    "average_delay_minutes": 0,
                    "median_delay_minutes": 0,
                    "on_time_percentage": 0,
                    "delayed_percentage": 0,
                    "delay_buckets": {
                        "slight_delay_15_30min": 0,
                        "moderate_delay_30_60min": 0,
                        "severe_delay_60min_plus": 0
                    }
                },
                "arrival": {
                    "average_delay_minutes": 0,
                    "median_delay_minutes": 0,
                    "on_time_percentage": 0,
                    "delayed_percentage": 0,
                    "delay_buckets": {
                        "slight_delay_15_30min": 0,
                        "moderate_delay_30_60min": 0,
                        "severe_delay_60min_plus": 0
                    }
                }
            }
        }
        
        # Calculate date range
        dates = []
        departure_delays = []
        arrival_delays = []
        
        # Process each flight
        for flight in flight_data:
            try:
                # Extract required data using helper functions
                flight_info = FlightDataProcessor._extract_flight_info(flight, include_predictions)
                
                # Add to collections for statistics
                if flight_info['scheduled_date']:
                    dates.append(flight_info['scheduled_date'])
                
                if flight_info['departure_delay_minutes'] is not None:
                    departure_delays.append(flight_info['departure_delay_minutes'])
                
                if flight_info['arrival_delay_minutes'] is not None and (not flight_info['is_arrival_predicted'] or include_predictions):
                    arrival_delays.append(flight_info['arrival_delay_minutes'])
                
                # Add to individual flights list
                result["individual_flights"].append({
                    "date": flight_info['scheduled_date'].strftime("%Y-%m-%d") if flight_info['scheduled_date'] else "Unknown",
                    "status": flight_info['status'],
                    "departure": {
                        "airport": flight_info['departure_airport'],
                        "scheduled": flight_info['departure_scheduled_local'],
                        "actual": flight_info['departure_actual_local'],
                        "delay_minutes": round(flight_info['departure_delay_minutes'], 1) if flight_info['departure_delay_minutes'] is not None else 0,
                        "terminal": flight_info['departure_terminal'],
                        "gate": flight_info['departure_gate']
                    },
                    "arrival": {
                        "airport": flight_info['arrival_airport'],
                        "scheduled": flight_info['arrival_scheduled_local'],
                        "actual": f"{flight_info['arrival_actual_local']}{'(predicted)' if flight_info['is_arrival_predicted'] else ''}",
                        "delay_minutes": round(flight_info['arrival_delay_minutes'], 1) if flight_info['arrival_delay_minutes'] is not None else 0,
                        "terminal": flight_info['arrival_terminal']
                    },
                    "aircraft": flight_info['aircraft_info']
                })
                
            except Exception as e:
                # Skip flights with parsing errors
                print(f"Error processing flight: {e}")
                continue
        
        # Calculate date range
        if dates:
            min_date = min(dates).strftime("%Y-%m-%d")
            max_date = max(dates).strftime("%Y-%m-%d")
            result["date_range"] = f"{min_date} to {max_date}"
        
        # Calculate departure delay statistics
        if departure_delays:
            result["delay_statistics"]["departure"] = FlightDataProcessor._calculate_delay_statistics(departure_delays)
        
        # Calculate arrival delay statistics
        if arrival_delays:
            result["delay_statistics"]["arrival"] = FlightDataProcessor._calculate_delay_statistics(arrival_delays)
        
        return result
    
    @staticmethod
    def _extract_flight_info(flight, include_predictions=True):
        """Extract and format information from a single flight record."""
        # Initialize result dictionary with default values
        result = {
            'scheduled_date': None,
            'status': flight.get('status', 'Unknown'),
            'departure_airport': '',
            'departure_scheduled_local': '',
            'departure_actual_local': '',
            'departure_delay_minutes': None,
            'departure_terminal': '',
            'departure_gate': '',
            'arrival_airport': '',
            'arrival_scheduled_local': '',
            'arrival_actual_local': '',
            'arrival_delay_minutes': None,
            'arrival_terminal': '',
            'is_arrival_predicted': False,
            'aircraft_info': ''
        }
        
        # Helper function to safely get nested values
        def safe_get(data, keys, default=None):
            current = data
            for key in keys:
                if isinstance(current, dict) and key in current:
                    current = current[key]
                else:
                    return default
            return current
        
        # Extract departure information
        departure_info = flight.get('departure', {})
        arrival_info = flight.get('arrival', {})
        
        # Get scheduled departure time
        departure_scheduled = safe_get(flight, ['departure', 'scheduledTime', 'utc'])
        if departure_scheduled:
            scheduled_dt = FlightDataProcessor._parse_time(departure_scheduled)
            if scheduled_dt:
                result['scheduled_date'] = scheduled_dt.date()
        
        # Get actual departure time
        actual_departure = None
        if safe_get(flight, ['departure', 'runwayTime', 'utc']):
            actual_departure = safe_get(flight, ['departure', 'runwayTime', 'utc'])
        elif safe_get(flight, ['departure', 'revisedTime', 'utc']):
            actual_departure = safe_get(flight, ['departure', 'revisedTime', 'utc'])
        
        # Calculate departure delay
        if departure_scheduled and actual_departure:
            scheduled_dt = FlightDataProcessor._parse_time(departure_scheduled)
            actual_dt = FlightDataProcessor._parse_time(actual_departure)
            if scheduled_dt and actual_dt:
                departure_delay = actual_dt - scheduled_dt
                result['departure_delay_minutes'] = departure_delay.total_seconds() / 60
        
        # Extract arrival information
        scheduled_arrival = safe_get(flight, ['arrival', 'scheduledTime', 'utc'])
        
        # Get actual arrival time
        actual_arrival = None
        if safe_get(flight, ['arrival', 'runwayTime', 'utc']):
            actual_arrival = safe_get(flight, ['arrival', 'runwayTime', 'utc'])
        elif safe_get(flight, ['arrival', 'revisedTime', 'utc']):
            actual_arrival = safe_get(flight, ['arrival', 'revisedTime', 'utc'])
        elif safe_get(flight, ['arrival', 'predictedTime', 'utc']):
            actual_arrival = safe_get(flight, ['arrival', 'predictedTime', 'utc'])
            result['is_arrival_predicted'] = True
        
        # Calculate arrival delay
        if scheduled_arrival and actual_arrival:
            scheduled_dt = FlightDataProcessor._parse_time(scheduled_arrival)
            actual_dt = FlightDataProcessor._parse_time(actual_arrival)
            if scheduled_dt and actual_dt:
                arrival_delay = actual_dt - scheduled_dt
                result['arrival_delay_minutes'] = arrival_delay.total_seconds() / 60
        
        # Format airport names
        result['departure_airport'] = f"{safe_get(departure_info, ['airport', 'iata'], '')} ({safe_get(departure_info, ['airport', 'name'], '')})"
        result['arrival_airport'] = f"{safe_get(arrival_info, ['airport', 'iata'], '')} ({safe_get(arrival_info, ['airport', 'name'], '')})"
        
        # Get local time strings
        result['departure_scheduled_local'] = safe_get(flight, ['departure', 'scheduledTime', 'local'], '')
        result['departure_actual_local'] = (
            safe_get(flight, ['departure', 'runwayTime', 'local']) or 
            safe_get(flight, ['departure', 'revisedTime', 'local'], '')
        )
        
        result['arrival_scheduled_local'] = safe_get(flight, ['arrival', 'scheduledTime', 'local'], '')
        result['arrival_actual_local'] = (
            safe_get(flight, ['arrival', 'runwayTime', 'local']) or 
            safe_get(flight, ['arrival', 'revisedTime', 'local']) or
            safe_get(flight, ['arrival', 'predictedTime', 'local'], '')
        )
        
        # Additional info
        result['departure_terminal'] = departure_info.get('terminal', '')
        result['departure_gate'] = departure_info.get('gate', '')
        result['arrival_terminal'] = arrival_info.get('terminal', '')
        
        # Aircraft details
        aircraft_model = safe_get(flight, ['aircraft', 'model'], '')
        aircraft_reg = safe_get(flight, ['aircraft', 'reg'], '')
        result['aircraft_info'] = f"{aircraft_model}{f' ({aircraft_reg})' if aircraft_reg else ''}"
        
        return result
    
    @staticmethod
    def _parse_time(time_str):
        """Parse a time string to a datetime object, handling different formats."""
        if not time_str:
            return None
        
        # Remove Z suffix if present
        if time_str.endswith('Z'):
            time_str = time_str[:-1]
        
        # Try different datetime formats
        formats = [
            "%Y-%m-%d %H:%M",  # 2025-01-01 07:55
            "%Y-%m-%d %H:%M:%S"  # 2025-01-01 07:55:00
        ]
        
        for fmt in formats:
            try:
                return datetime.strptime(time_str, fmt)
            except ValueError:
                continue
        
        return None
    
    @staticmethod
    def _calculate_delay_statistics(delays):
        """Calculate statistics for a list of delays."""
        if not delays:
            return {
                "average_delay_minutes": 0,
                "median_delay_minutes": 0,
                "on_time_percentage": 0,
                "delayed_percentage": 0,
                "delay_buckets": {
                    "slight_delay_15_30min": 0,
                    "moderate_delay_30_60min": 0,
                    "severe_delay_60min_plus": 0
                }
            }
        
        # Sort delays for percentile calculations
        sorted_delays = sorted(delays)
        
        # Average delay
        avg_delay = sum(delays) / len(delays)
        
        # Median delay
        middle = len(sorted_delays) // 2
        if len(sorted_delays) % 2 == 0:
            med_delay = (sorted_delays[middle-1] + sorted_delays[middle]) / 2
        else:
            med_delay = sorted_delays[middle]
        
        # On-time percentage (less than 15 minutes delay)
        on_time_count = sum(1 for delay in delays if delay < 15)
        on_time_pct = on_time_count / len(delays) * 100
        delayed_pct = 100 - on_time_pct
        
        # Delay buckets
        slight_delay = sum(1 for delay in delays if 15 <= delay < 30)
        moderate_delay = sum(1 for delay in delays if 30 <= delay < 60)
        severe_delay = sum(1 for delay in delays if delay >= 60)
        
        slight_delay_pct = slight_delay / len(delays) * 100
        moderate_delay_pct = moderate_delay / len(delays) * 100
        severe_delay_pct = severe_delay / len(delays) * 100
        
        return {
            "average_delay_minutes": round(avg_delay, 1),
            "median_delay_minutes": round(med_delay, 1),
            "on_time_percentage": round(on_time_pct, 1),
            "delayed_percentage": round(delayed_pct, 1),
            "delay_buckets": {
                "slight_delay_15_30min": round(slight_delay_pct, 1),
                "moderate_delay_30_60min": round(moderate_delay_pct, 1),
                "severe_delay_60min_plus": round(severe_delay_pct, 1)
            }
        }

# ===== Analysis Layer =====
class FlightDataAnalyzer:
    """Analyze flight data and calculate combined metrics."""
    
    @staticmethod
    def combine_statistics(historical_data, recent_data):
        """
        Combine historical and recent statistics using weighted average.
        
        Args:
            historical_data: Processed historical data
            recent_data: Processed recent data
            
        Returns:
            dict: Combined statistics with data_quality indicator
        """
        # Determine data completeness
        data_quality = "complete"
        
        if not historical_data and not recent_data:
            return None
            
        if not historical_data:
            data_quality = "missing_historical"
            return_data = recent_data
        elif not recent_data:
            data_quality = "missing_recent"
            return_data = historical_data
        else:
            return_data = None  # Will be set later in this function
        
        # If we already decided to return a single data source due to missing data
        if return_data:
            # Add data quality flag
            return_data["data_quality"] = data_quality
            return return_data
            
        # Create a new results dictionary for the case where we have both data sources
        combined = {
            "flight_number": historical_data.get("flight_number") or recent_data.get("flight_number"),
            "airline": recent_data.get("airline", "Unknown"),
            "route": recent_data.get("route", "Unknown"),
            "data_quality": data_quality,  # Add data quality flag
            "data_sources": {
                "historical": {
                    "total_flights": historical_data.get("overall", {}).get("total_flights_analyzed", 0),
                    "date_range": historical_data.get("overall", {}).get("overall_date_range", "Unknown"),
                },
                "recent": {
                    "total_flights": recent_data.get("total_flights", 0),
                    "date_range": recent_data.get("date_range", "Unknown"),
                }
            },
            "combined_statistics": {},
            "individual_flights": recent_data.get("individual_flights", [])
        }
        
        # Combine delay metrics
        # Check what type of historical data we have (arrival or departure)
        historical_data_type = historical_data.get("overall", {}).get("data_type", "departure")
        
        # Extract historical delay percentage based on data type
        if historical_data_type == "arrival":
            historical_delay = historical_data.get("overall", {}).get("overall_delayed_percentage", 0)
            print(f"  Using historical ARRIVAL delays ({historical_delay}%)")
        else:
            historical_delay = historical_data.get("overall", {}).get("overall_delayed_percentage", 0)
            print(f"  Using historical DEPARTURE delays ({historical_delay}%)")
        
        # Extract delay percentages from recent data - always prioritize arrival
        recent_arrival_delay = recent_data.get("delay_statistics", {}).get("arrival", {}).get("delayed_percentage", 0)
        
        # If arrival data is missing, use departure as fallback
        if recent_arrival_delay == 0 and "departure" in recent_data.get("delay_statistics", {}):
            recent_overall_delay = recent_data.get("delay_statistics", {}).get("departure", {}).get("delayed_percentage", 0)
            print(f"  Using recent DEPARTURE delays ({recent_overall_delay}%)")
        else:
            recent_overall_delay = recent_arrival_delay
            print(f"  Using recent ARRIVAL delays ({recent_overall_delay}%)")
        
        # Apply weights
        weighted_delay = (historical_delay * HISTORICAL_WEIGHT + recent_overall_delay * RECENT_WEIGHT)
        
        # Extract delay buckets
        hist_buckets = {}
        
        # Choose between arrival and departure metrics for historical data
        if historical_data_type == "arrival" and historical_data.get("arrival_options"):
            # Use arrival options when available
            print("  Using historical arrival delay buckets")
            for option in historical_data.get("arrival_options", []):
                for bucket_key, value in option.get("delay_buckets", {}).items():
                    if bucket_key not in hist_buckets:
                        hist_buckets[bucket_key] = 0
                    hist_buckets[bucket_key] += value * option.get("flights_analyzed", 0)
                    
            # Normalize historical buckets by total flights
            total_hist_flights = historical_data.get("overall", {}).get("total_flights_analyzed", 0)
            
        else:
            # Fall back to departure options
            print("  Using historical departure delay buckets")
            for option in historical_data.get("departure_options", []):
                for bucket_key, value in option.get("delay_buckets", {}).items():
                    if bucket_key not in hist_buckets:
                        hist_buckets[bucket_key] = 0
                    hist_buckets[bucket_key] += value * option.get("flights_analyzed", 0)
            
            # Normalize historical buckets by total flights
            total_hist_flights = historical_data.get("overall", {}).get("total_flights_analyzed", 0)
                
        # Normalize historical buckets
        if total_hist_flights > 0:
            for key in hist_buckets:
                hist_buckets[key] = hist_buckets[key] / total_hist_flights
        
        # Get recent buckets - prioritize arrival buckets
        if recent_data.get("delay_statistics", {}).get("arrival", {}).get("delay_buckets"):
            print("  Using recent arrival delay buckets")
            recent_buckets = recent_data.get("delay_statistics", {}).get("arrival", {}).get("delay_buckets", {})
        else:
            print("  Using recent departure delay buckets")
            recent_buckets = recent_data.get("delay_statistics", {}).get("departure", {}).get("delay_buckets", {})
        
        # Calculate combined buckets
        combined_buckets = {}
        for key in hist_buckets:
            recent_value = recent_buckets.get(key, 0)
            combined_buckets[key] = hist_buckets[key] * HISTORICAL_WEIGHT + recent_value * RECENT_WEIGHT
        
        # Build combined statistics
        combined["combined_statistics"] = {
            "overall_delay_percentage": round(weighted_delay, 1),
            "delay_buckets": {k: round(v, 1) for k, v in combined_buckets.items()},
            "recent_metrics": {
                "departure": recent_data.get("delay_statistics", {}).get("departure", {}),
                "arrival": recent_data.get("delay_statistics", {}).get("arrival", {})
            },
            "historical_metrics": {
                "overall_delayed_percentage": historical_delay
            }
        }
        
        return combined
    
    @staticmethod
    def calculate_reliability_score(combined_data):
        """
        Calculate an overall reliability score from 0-100.
        
        Args:
            combined_data: Combined flight statistics
            
        Returns:
            int: Reliability score from 0-100, or None if data quality is insufficient
        """
        if not combined_data:
            return 0
        
        # Check data quality
        data_quality = combined_data.get("data_quality", "unknown")
        
        # Handle different data quality scenarios
        if data_quality == "missing_historical":
            # For missing historical data, use recent data with a confidence penalty
            # We'll cap the reliability score at 85 to indicate limited historical context
            print("  ⚠️ Reliability calculation uses only recent data (limited historical context)")
            
            if "delay_statistics" in combined_data:
                # Get number of flights to check data reliability
                num_flights = len(combined_data.get("individual_flights", []))
                if num_flights < 3:
                    print(f"  ⚠️ Very limited data sample: Only {num_flights} recent flights")
                
                # Use arrival delay percentage if available, otherwise use departure
                arrival_stats = combined_data.get("delay_statistics", {}).get("arrival", {})
                if arrival_stats:
                    print("  Using arrival statistics for reliability score")
                    overall_delay = arrival_stats.get("delayed_percentage", 0)
                    
                    # Get severity buckets from arrival
                    delay_buckets = arrival_stats.get("delay_buckets", {})
                    slight_delay = delay_buckets.get("slight_delay_15_30min", 0)
                    moderate_delay = delay_buckets.get("moderate_delay_30_60min", 0)
                    severe_delay = delay_buckets.get("severe_delay_60min_plus", 0)
                else:
                    print("  Using departure statistics for reliability score (no arrival data)")
                    departure_stats = combined_data.get("delay_statistics", {}).get("departure", {})
                    overall_delay = departure_stats.get("delayed_percentage", 0)
                    
                    # Get severity buckets from departure
                    delay_buckets = departure_stats.get("delay_buckets", {})
                    slight_delay = delay_buckets.get("slight_delay_15_30min", 0)
                    moderate_delay = delay_buckets.get("moderate_delay_30_60min", 0)
                    severe_delay = delay_buckets.get("severe_delay_60min_plus", 0)
                
                # Calculate raw score
                on_time_pct = 100 - overall_delay
                severity_penalty = (slight_delay * 0.5 + moderate_delay * 1.5 + severe_delay * 3) / 100
                raw_score = on_time_pct - (severity_penalty * 10)
                
                # Add explanation for very low scores
                if raw_score <= 10:
                    print(f"  ⚠️ Low reliability score due to: {overall_delay}% delayed flights with severity breakdown: slight {slight_delay}%, moderate {moderate_delay}%, severe {severe_delay}%")
                
                # Apply confidence cap and return
                return min(85, max(0, round(raw_score)))
            else:
                # No arrival stats, very limited confidence
                print("  ⚠️ Cannot calculate reliable score - insufficient arrival data")
                return 50  # Neutral score indicating uncertainty
        
        elif data_quality == "missing_recent":
            # Missing recent data - use historical with lowered confidence
            print("  ⚠️ Reliability calculation uses only historical data (no recent flights)")
            
            # Check historical data type
            historical_data_type = combined_data.get("overall", {}).get("data_type", "departure")
            
            # Get historical delay percentage 
            overall_delay = combined_data.get("overall", {}).get("overall_delayed_percentage", 0)
            
            if historical_data_type == "arrival" and combined_data.get("arrival_options"):
                print("  Using historical ARRIVAL data for reliability calculation")
                
                # Find the most recent arrival option for buckets
                arrival_options = combined_data.get("arrival_options", [])
                if arrival_options:
                    # Sort by date range to find most recent
                    sorted_options = sorted(
                        arrival_options, 
                        key=lambda x: x.get("date_range", "").split(" to ")[-1],
                        reverse=True
                    )
                    
                    # Get buckets from most recent option
                    delay_buckets = sorted_options[0].get("delay_buckets", {})
                    slight_delay = delay_buckets.get("slight_delay_15_30min", 0)
                    moderate_delay = delay_buckets.get("moderate_delay_30_60min", 0)
                    severe_delay = delay_buckets.get("severe_delay_60min_plus", 0)
                    
                    # Calculate with higher confidence (cap at 90)
                    on_time_pct = 100 - overall_delay
                    severity_penalty = (slight_delay * 0.5 + moderate_delay * 1.5 + severe_delay * 3) / 100
                    raw_score = on_time_pct - (severity_penalty * 10)
                    
                    return min(90, max(0, round(raw_score)))
                else:
                    # No detailed metrics
                    on_time_pct = 100 - overall_delay
                    return min(85, max(0, round(on_time_pct)))
            else:
                print("  Using historical DEPARTURE data for reliability calculation")
                
                # Find the most recent departure option for buckets
                departure_options = combined_data.get("departure_options", [])
                if departure_options:
                    # Sort by date range to find most recent
                    sorted_options = sorted(
                        departure_options, 
                        key=lambda x: x.get("date_range", "").split(" to ")[-1],
                        reverse=True
                    )
                    
                    # Get buckets from most recent option
                    delay_buckets = sorted_options[0].get("delay_buckets", {})
                    slight_delay = delay_buckets.get("slight_delay_15_30min", 0)
                    moderate_delay = delay_buckets.get("moderate_delay_30_60min", 0)
                    severe_delay = delay_buckets.get("severe_delay_60min_plus", 0)
                    
                    # Calculate with lower confidence (cap at 85)
                    on_time_pct = 100 - overall_delay
                    severity_penalty = (slight_delay * 0.5 + moderate_delay * 1.5 + severe_delay * 3) / 100
                    raw_score = on_time_pct - (severity_penalty * 10)
                    
                    return min(85, max(0, round(raw_score)))
                else:
                    # No detailed metrics
                    on_time_pct = 100 - overall_delay
                    return min(80, max(0, round(on_time_pct)))
        
        # For complete data, calculate normally
        overall_delay = combined_data.get("combined_statistics", {}).get("overall_delay_percentage", 0)
        
        # Get severity buckets
        delay_buckets = combined_data.get("combined_statistics", {}).get("delay_buckets", {})
        slight_delay = delay_buckets.get("slight_delay_15_30min", 0)
        moderate_delay = delay_buckets.get("moderate_delay_30_60min", 0)
        severe_delay = delay_buckets.get("severe_delay_60min_plus", 0)
        
        # Calculate weighted score (higher delay percentage = lower score)
        on_time_pct = 100 - overall_delay
        
        # Apply penalties for more severe delays
        severity_penalty = (slight_delay * 0.5 + moderate_delay * 1.5 + severe_delay * 3) / 100
        
        # Calculate final score (0-100 scale)
        raw_score = on_time_pct - (severity_penalty * 10)
        
        # Ensure score is in 0-100 range
        return max(0, min(100, round(raw_score)))

# ===== Presentation Layer =====
class FlightDataPresenter:
    """Format flight data for display or export."""
    
    @staticmethod
    def format_flight_summary(combined_data):
        """
        Format flight summary for display.
        
        Args:
            combined_data: Combined flight statistics
            
        Returns:
            str: Formatted summary
        """
        if not combined_data:
            return "No data available"
            
        # Calculate reliability score
        reliability_score = FlightDataAnalyzer.calculate_reliability_score(combined_data)
        
        summary = []
        summary.append(f"Flight: {combined_data['flight_number']} ({combined_data.get('airline', 'Unknown')})")
        summary.append(f"Route: {combined_data.get('route', 'Unknown')}")
        summary.append("")
        
        # Add data quality note if needed
        data_quality = combined_data.get("data_quality", "complete")
        if data_quality != "complete":
            summary.append("⚠️ LIMITED DATA QUALITY ⚠️")
            if data_quality == "missing_historical":
                summary.append("Note: Analysis based mainly on recent flight data. Limited historical context.")
            elif data_quality == "missing_recent":
                summary.append("Note: Analysis based on historical data only. No recent flight information.")
            summary.append("")
        
        # Add reliability score with color coding
        score_rating = "Excellent" if reliability_score >= 90 else "Good" if reliability_score >= 80 else "Average" if reliability_score >= 70 else "Below Average" if reliability_score >= 60 else "Poor"
        
        # Add confidence indicator for limited data
        confidence_suffix = ""
        if data_quality != "complete":
            confidence_suffix = " (Limited confidence)"
            
        summary.append(f"Reliability Score: {reliability_score}/100 ({score_rating}){confidence_suffix}")
        
        # Handle different data structures based on data quality
        if data_quality == "complete":
            # Add info about historical data type
            hist_data_type = combined_data.get('data_sources', {}).get('historical', {}).get('data_type', 'departure')
            data_type_str = "ARRIVAL" if hist_data_type == "arrival" else "DEPARTURE"
            
            # Standard format for complete data
            summary.append("")
            summary.append("Data Sources:")
            summary.append(f"  • Historical Data ({data_type_str}): {combined_data['data_sources']['historical']['total_flights']} flights ({combined_data['data_sources']['historical']['date_range']})")
            summary.append(f"  • Recent Data (ARRIVAL): {combined_data['data_sources']['recent']['total_flights']} flights ({combined_data['data_sources']['recent']['date_range']})")
            
            # Add combined statistics
            summary.append("")
            summary.append("Delay Analysis:")
            summary.append(f"  • Overall Delay Rate: {combined_data['combined_statistics']['overall_delay_percentage']}%")
            summary.append("  • Delay Severity:")
            summary.append(f"    - Slight Delays (15-30 min): {combined_data['combined_statistics']['delay_buckets']['slight_delay_15_30min']}%")
            summary.append(f"    - Moderate Delays (30-60 min): {combined_data['combined_statistics']['delay_buckets']['moderate_delay_30_60min']}%")
            summary.append(f"    - Severe Delays (60+ min): {combined_data['combined_statistics']['delay_buckets']['severe_delay_60min_plus']}%")
            
        elif data_quality == "missing_historical":
            # Format for recent data only
            summary.append("")
            summary.append("Data Source:")
            summary.append(f"  • Recent Data: {combined_data.get('total_flights', 0)} flights ({combined_data.get('date_range', 'Unknown')})")
            
            # Add recent statistics 
            summary.append("")
            summary.append("Delay Analysis:")
            
            arrival_stats = combined_data.get("delay_statistics", {}).get("arrival", {})
            departure_stats = combined_data.get("delay_statistics", {}).get("departure", {})
            
            if arrival_stats:
                summary.append(f"  • Arrival Delay Rate: {arrival_stats.get('delayed_percentage', 0)}%")
                summary.append("  • Arrival Delay Severity:")
                
                delay_buckets = arrival_stats.get("delay_buckets", {})
                summary.append(f"    - Slight Delays (15-30 min): {delay_buckets.get('slight_delay_15_30min', 0)}%")
                summary.append(f"    - Moderate Delays (30-60 min): {delay_buckets.get('moderate_delay_30_60min', 0)}%")
                summary.append(f"    - Severe Delays (60+ min): {delay_buckets.get('severe_delay_60min_plus', 0)}%")
                
            if departure_stats:
                summary.append(f"  • Departure Delay Rate: {departure_stats.get('delayed_percentage', 0)}%")
            
        elif data_quality == "missing_recent":
            # Format for historical data only
            summary.append("")
            summary.append("Data Source:")
            summary.append(f"  • Historical Data: {combined_data.get('overall', {}).get('total_flights_analyzed', 0)} flights ({combined_data.get('overall', {}).get('overall_date_range', 'Unknown')})")
            
            # Add historical statistics
            summary.append("")
            summary.append("Delay Analysis:")
            summary.append(f"  • Overall Historical Delay Rate: {combined_data.get('overall', {}).get('overall_delayed_percentage', 0)}%")
            
            # Display most recent departure option if available
            departure_options = combined_data.get("departure_options", [])
            if departure_options:
                # Sort by date range to find most recent
                sorted_options = sorted(
                    departure_options, 
                    key=lambda x: x.get("date_range", "").split(" to ")[-1],
                    reverse=True
                )
                most_recent = sorted_options[0]
                
                summary.append(f"  • Most Recent Historical Data:")
                summary.append(f"    - Period: {most_recent.get('date_range', 'Unknown')}")
                summary.append(f"    - Flights: {most_recent.get('flights_analyzed', 0)}")
                summary.append(f"    - Delay Rate: {most_recent.get('delayed_percentage', 0)}%")
                
                buckets = most_recent.get("delay_buckets", {})
                summary.append(f"    - Slight Delays (15-30 min): {buckets.get('slight_delay_15_30min', 0)}%")
                summary.append(f"    - Moderate Delays (30-60 min): {buckets.get('moderate_delay_30_60min', 0)}%")
                summary.append(f"    - Severe Delays (60+ min): {buckets.get('severe_delay_60min_plus', 0)}%")
        
        # Add recent flights if available regardless of data quality
        if combined_data.get('individual_flights'):
            summary.append("")
            summary.append("Recent Flights:")
            for i, flight in enumerate(combined_data['individual_flights'][:5]):  # Show only 5 most recent
                summary.append(f"  • {flight['date']}: Dep {flight['departure']['delay_minutes']} min delay, Arr {flight['arrival']['delay_minutes']} min delay")
        
        return "\n".join(summary)
    
    @staticmethod
    def export_to_csv(combined_data, filename=None):
        """
        Export flight data to CSV.
        
        Args:
            combined_data: Dictionary of combined flight data
            filename: Optional filename to save to
            
        Returns:
            pandas.DataFrame: DataFrame with flight data
        """
        if not combined_data or not combined_data.get('individual_flights'):
            return None
            
        # Create DataFrame from individual flights
        rows = []
        for flight in combined_data['individual_flights']:
            row = {
                'Date': flight['date'],
                'Status': flight['status'],
                'Departure Airport': flight['departure']['airport'],
                'Scheduled Departure': flight['departure']['scheduled'],
                'Actual Departure': flight['departure']['actual'],
                'Departure Delay (min)': flight['departure']['delay_minutes'],
                'Arrival Airport': flight['arrival']['airport'],
                'Scheduled Arrival': flight['arrival']['scheduled'],
                'Actual Arrival': flight['arrival']['actual'],
                'Arrival Delay (min)': flight['arrival']['delay_minutes'],
                'Aircraft': flight['aircraft']
            }
            rows.append(row)
            
        df = pd.DataFrame(rows)
        
        # Save to file if filename provided
        if filename:
            df.to_csv(filename, index=False)
            
        return df

# ===== Main Controller =====
class FlightAnalysisSystem:
    """Main controller class for the flight analysis system."""
    
    def __init__(self, api_key=None):
        """Initialize the flight analysis system."""
        self.api = FlightDataAPI(api_key)
    
    def analyze_flight(self, flight_number):
        """
        Analyze a single flight using both historical and recent data.
        
        Args:
            flight_number: Flight number to analyze
            
        Returns:
            dict: Combined flight analysis
        """
        print(f"\n--- Flight: {flight_number} ---")
        
        # Fetch data
        print(f"Historical data for {flight_number}:")
        historical_data = self.api.get_historical_delay_stats(flight_number)
        
        print(f"Recent data for {flight_number}:")
        recent_data = self.api.get_recent_flights(flight_number)
        
        # Process data
        processed_historical = FlightDataProcessor.process_historical_delay_stats(historical_data)
        processed_recent = FlightDataProcessor.process_recent_flight_data(recent_data)
        
        # Combine and analyze
        combined_data = FlightDataAnalyzer.combine_statistics(processed_historical, processed_recent)
        
        return combined_data
    
    def analyze_multiple_flights(self, flight_list):
        """
        Analyze multiple flights in parallel.
        
        Args:
            flight_list: List of flight dictionaries with flight_number key
            
        Returns:
            dict: Dictionary of flight analyses keyed by flight number
        """
        # Batch fetch all flight data
        raw_data = self.api.batch_fetch_flights(flight_list)
        
        results = {}
        for flight_number, data in raw_data.items():
            # Process data
            processed_historical = FlightDataProcessor.process_historical_delay_stats(data.get('historical'))
            processed_recent = FlightDataProcessor.process_recent_flight_data(data.get('recent'))
            
            # Combine and analyze
            combined_data = FlightDataAnalyzer.combine_statistics(processed_historical, processed_recent)
            
            # Store results
            results[flight_number] = combined_data
        
        return results
    
    def generate_report(self, flight_number=None, flight_data=None):
        """
        Generate a formatted report for a flight.
        
        Args:
            flight_number: Flight number to analyze (if data not provided)
            flight_data: Pre-analyzed flight data (optional)
            
        Returns:
            str: Formatted flight report
        """
        # If data not provided, analyze the flight
        if not flight_data and flight_number:
            flight_data = self.analyze_flight(flight_number)
            
        if not flight_data:
            return "No flight data available"
            
        # Format the data
        return FlightDataPresenter.format_flight_summary(flight_data)


In [None]:
"""Example usage of the Flight Analysis System."""
# Define target flights
target_flights = [
    {"airline": "Turkish Airlines", "flight_number": "TK714", "origin_hub": "IST", "destination_icao": "OPLA", "destination_city": "Lahore"},
    {"airline": "Emirates", "flight_number": "EK600", "origin_hub": "DXB", "destination_icao": "OPKC", "destination_city": "Karachi"}
]

# Initialize the system
system = FlightAnalysisSystem()

# Analyze a single flight first to test cache behavior
flight_number = target_flights[0]["flight_number"]
print(f"\n===== INITIAL ANALYSIS =====")
print(f"Analyzing flight {flight_number}...")

flight_data = system.analyze_flight(flight_number)
report = system.generate_report(flight_data=flight_data)

print("\nFlight Analysis Report:")
print("="*80)
print(report)

# Export to CSV
df = FlightDataPresenter.export_to_csv(flight_data, f"{flight_number}_analysis.csv")
if df is not None:
    print(f"\nExported data to {flight_number}_analysis.csv")

# Add a new flight to demonstrate dynamic flight addition
print("\n===== ADDING A NEW FLIGHT =====")
print("Adding Qatar Airways QR610 to demonstrate cache behavior with dynamic flight additions...")
target_flights.append({
    "airline": "Qatar Airways", "flight_number": "QR610", "origin_hub": "DOH", 
    "destination_icao": "OPKC", "destination_city": "Karachi"
})

# Analyze all flights including the new one
print("\n===== MULTI-FLIGHT ANALYSIS WITH CACHE =====")
results = system.analyze_multiple_flights(target_flights)

for flight_number, data in results.items():
    print(f"\nSummary for {flight_number}:")
    print("-"*40)
    
    # Get reliability score
    reliability_score = FlightDataAnalyzer.calculate_reliability_score(data) 
    print(f"Reliability Score: {reliability_score}/100")
    
    # Check data quality and display appropriate delay percentage
    data_quality = data.get("data_quality", "complete")
    
    if data_quality == "complete":
        # Complete data - use combined statistics
        delay_pct = data.get("combined_statistics", {}).get("overall_delay_percentage", "N/A")
        print(f"Overall Delay Rate: {delay_pct}%")
        
        # Add severity breakdown for very poor scores
        if reliability_score < 40:
            delay_buckets = data.get("combined_statistics", {}).get("delay_buckets", {})
            print("Delay Severity Breakdown:")
            print(f" - Slight (15-30 min): {delay_buckets.get('slight_delay_15_30min', 'N/A')}%")
            print(f" - Moderate (30-60 min): {delay_buckets.get('moderate_delay_30_60min', 'N/A')}%")
            print(f" - Severe (60+ min): {delay_buckets.get('severe_delay_60min_plus', 'N/A')}%")
            
    elif data_quality == "missing_historical":
        # Recent data only - use arrival delay statistics if available
        arrival_stats = data.get("delay_statistics", {}).get("arrival", {})
        delay_pct = arrival_stats.get("delayed_percentage", "N/A")
        print(f"Recent Arrival Delay Rate: {delay_pct}%")
        
        # Add severity breakdown for all flights with limited data
        delay_buckets = arrival_stats.get("delay_buckets", {})
        if delay_buckets:
            print("Recent Delay Severity Breakdown:")
            print(f" - Slight (15-30 min): {delay_buckets.get('slight_delay_15_30min', 'N/A')}%")
            print(f" - Moderate (30-60 min): {delay_buckets.get('moderate_delay_30_60min', 'N/A')}%")
            print(f" - Severe (60+ min): {delay_buckets.get('severe_delay_60min_plus', 'N/A')}%")
        
        # Show sample size
        flights_count = len(data.get("individual_flights", []))
        print(f"Based on {flights_count} recent flights")
        
    elif data_quality == "missing_recent":
        # Historical data only - use overall historical percentage 
        delay_pct = data.get("overall", {}).get("overall_delayed_percentage", "N/A")
        print(f"Historical Delay Rate: {delay_pct}%")
    else:
        print("No delay statistics available")