<a href="https://colab.research.google.com/github/RemyaVKarthikeyan/AA-Stagecoach-Project/blob/main/16_Aug_2024_Coding_for_Lambda_Function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
import time
import logging
import signal
import sys

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def fetch_data(url, retries=3):
    """Fetch data from a URL with retry logic."""
    for attempt in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            logging.error(f"Error fetching data from {url}: {e}")
            if attempt < retries - 1:
                time.sleep(2)  # Backoff before retrying
            else:
                return None

def save_data(df_routes, df_arrivals):
    """Save the DataFrames to CSV files."""
    df_routes.to_csv('all_bus_routes.csv', index=False)
    df_arrivals.to_csv('live_bus_arrivals.csv', index=False)
    logging.info("Data saved to CSV files.")

def signal_handler(sig, frame):
    """Handle interruptions and save data."""
    logging.info("Interrupted! Saving data...")
    save_data(pd.DataFrame(all_routes), pd.DataFrame(live_arrivals))
    sys.exit(0)

# Register the signal handler for interruptions
signal.signal(signal.SIGINT, signal_handler)

# Initialize variables
all_routes = []
live_arrivals = []

try:
    # Step 1: Fetch all bus lines in London
    bus_lines_url = "https://api.tfl.gov.uk/Line/Mode/bus"
    bus_lines = fetch_data(bus_lines_url)

    if bus_lines:
        bus_line_ids = [line['id'] for line in bus_lines]
        logging.info(f"Retrieved {len(bus_line_ids)} bus lines.")
    else:
        logging.error("Failed to retrieve bus lines data.")
        bus_line_ids = []

    # Step 2: Fetch stop points and live arrivals for each bus line
    for line_id in bus_line_ids:
        for direction in ['inbound', 'outbound']:
            route_url = f"https://api.tfl.gov.uk/Line/{line_id}/Route/Sequence/{direction}"
            route_data = fetch_data(route_url)

            if route_data and route_data.get('stopPointSequences'):
                stop_points = route_data['stopPointSequences'][0]['stopPoint']

                for stop in stop_points:
                    stop['lineId'] = line_id
                    stop['direction'] = direction
                    all_routes.append(stop)

                    # Fetch live arrival data for each stop
                    stop_point_id = stop['id']
                    arrivals_url = f"https://api.tfl.gov.uk/Line/{line_id}/Arrivals/{stop_point_id}"
                    arrivals_data = fetch_data(arrivals_url)

                    if arrivals_data:
                        for arrival in arrivals_data:
                            arrival['lineId'] = line_id
                            arrival['stopPointId'] = stop_point_id
                            live_arrivals.append(arrival)

                    # Small delay to avoid overwhelming the API
                    time.sleep(0.5)
            else:
                logging.warning(f"No stop point sequences found for line {line_id} in {direction} direction.")

        # Small delay between lines to avoid overwhelming the API
        time.sleep(1)

finally:
    # Ensure data is saved if the process ends
    save_data(pd.DataFrame(all_routes), pd.DataFrame(live_arrivals))


ERROR:root:Error fetching data from https://api.tfl.gov.uk/Line/1/Arrivals/490006180S: 429 Client Error: Too Many Requests for url: https://api.tfl.gov.uk/Line/1/Arrivals/490006180S
ERROR:root:Error fetching data from https://api.tfl.gov.uk/Line/1/Arrivals/490006180S: 429 Client Error: Too Many Requests for url: https://api.tfl.gov.uk/Line/1/Arrivals/490006180S
ERROR:root:Error fetching data from https://api.tfl.gov.uk/Line/1/Arrivals/490006180S: 429 Client Error: Too Many Requests for url: https://api.tfl.gov.uk/Line/1/Arrivals/490006180S
ERROR:root:Error fetching data from https://api.tfl.gov.uk/Line/1/Arrivals/490013939S: 429 Client Error: Too Many Requests for url: https://api.tfl.gov.uk/Line/1/Arrivals/490013939S
ERROR:root:Error fetching data from https://api.tfl.gov.uk/Line/1/Arrivals/490013939S: 429 Client Error: Too Many Requests for url: https://api.tfl.gov.uk/Line/1/Arrivals/490013939S
ERROR:root:Error fetching data from https://api.tfl.gov.uk/Line/1/Arrivals/490013939S: 429

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
