In [None]:
"""
Simple KMB API Data Extractor

Extracts real-time bus arrival data from KMB API for specified stop IDs.
"""

import requests
import json
from datetime import datetime, timedelta
import pandas as pd


def get_kmb_data(stop_id):
    """
    Extract API data from KMB for a given stop ID.

    Args:
        stop_id (str): The stop ID to query

    Returns:
        dict: API response data or None if error
    """
    url = f"https://data.etabus.gov.hk/v1/transport/kmb/stop-eta/{stop_id}"

    try:
        print(f"Fetching data for stop ID: {stop_id}")
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()

        print(f"Successfully retrieved data for stop {stop_id}")
        return data

    except requests.RequestException as e:
        print(f"Error fetching data for stop {stop_id}: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON for stop {stop_id}: {e}")
        return None


def process_data(data, stop_id):
    """
    Process the raw API data to extract useful information.

    Args:
        data (dict): Raw API response
        stop_id (str): Stop ID for reference

    Returns:
        list: Processed route data
    """
    if not data or not data.get('data'):
        print(f"No data available for stop {stop_id}")
        return []

    routes = data['data']
    processed_routes = []

    # Get current time for filtering
    now = datetime.now()

    print(f"\nProcessing {len(routes)} route entries for stop {stop_id}:")

    for route in routes:
        route_info = {
            'stop_id': stop_id,
            'route': route.get('route', 'Unknown'),
            'direction': route.get('dir', 'Unknown'),
            'eta': route.get('eta', 'No ETA'),
            'eta_seq': route.get('eta_seq', 'N/A'),
            'data_timestamp': route.get('data_timestamp', 'N/A')
        }

        processed_routes.append(route_info)

        # Print route info
        eta_str = route_info['eta'] if route_info['eta'] != 'No ETA' else 'No ETA'
        print(f"  Route {route_info['route']} ({route_info['direction']}): {eta_str}")

    return processed_routes


def main():
    """Main function to extract data for all stop IDs."""

    # Stop IDs from your stations dictionary
    stations = {
        "St. Martin": [
            "3F24CFF9046300D9",
            "33ABA49F0E91A247",
            "07AB149DAD888683",
            "4A0ECA0D5AA4CB7E"
        ],
        "CHONG SAN ROAD": [
            "3A7AC3A5F9530786",
            "023E9E5A9E073E1A",
            "B34F59A0270AEDA4",
            "437CE05BCFE6248C"
        ]
    }

    all_data = []

    print("KMB API Data Extractor")
    print("=" * 50)

    for station_name, stop_ids in stations.items():
        print(f"\n{'='*20} {station_name} {'='*20}")

        for stop_id in stop_ids:
            # Get raw data from API
            raw_data = get_kmb_data(stop_id)

            if raw_data:
                # Process the data
                processed_data = process_data(raw_data, stop_id)
                all_data.extend(processed_data)
            else:
                print(f"Failed to get data for stop {stop_id}")

    # Save to CSV
    if all_data:
        df = pd.DataFrame(all_data)
        filename = f"kmb_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        df.to_csv(filename, index=False)
        print(f"\n{'='*50}")
        print(f"Data extraction complete!")
        print(f"Total records: {len(all_data)}")
        print(f"Data saved to: {filename}")

        # Display summary
        print(f"\nSummary:")
        print(f"Total routes found: {len(all_data)}")

        # Group by station
        df['station'] = df['stop_id'].map({
            stop_id: station for station, stops in stations.items()
            for stop_id in stops
        })

        station_summary = df.groupby('station').size()
        print(f"\nRoutes per station:")
        for station, count in station_summary.items():
            print(f"  {station}: {count} routes")

    else:
        print("No data extracted. Please check your internet connection.")


if __name__ == "__main__":
    main()


KMB API Data Extractor

Fetching data for stop ID: 3F24CFF9046300D9
Successfully retrieved data for stop 3F24CFF9046300D9

Processing 15 route entries for stop 3F24CFF9046300D9:
  Route 272A (O): 2025-10-19T18:49:44+08:00
  Route 272A (O): 2025-10-19T19:03:39+08:00
  Route 272A (O): 2025-10-19T19:18:03+08:00
  Route 272A (O): 2025-10-19T18:49:44+08:00
  Route 272A (O): 2025-10-19T19:03:39+08:00
  Route 272A (O): 2025-10-19T19:18:03+08:00
  Route 272A (O): 2025-10-19T18:49:44+08:00
  Route 272A (O): 2025-10-19T19:03:39+08:00
  Route 272A (O): 2025-10-19T19:18:03+08:00
  Route 274 (O): None
  Route 274P (I): 2025-10-19T19:05:31+08:00
  Route 274P (I): 2025-10-19T19:24:33+08:00
  Route 64X (O): None
  Route 65X (O): None
  Route 82D (O): None
Fetching data for stop ID: 33ABA49F0E91A247
Successfully retrieved data for stop 33ABA49F0E91A247

Processing 8 route entries for stop 33ABA49F0E91A247:
  Route 263C (I): None
  Route 272P (O): None
  Route 272P (O): None
  Route 272X (O): None
  Rou