# API Request Example to VRR
Define the name of the API endpoint you want to query and the parameters you need to pass.

In [None]:
# -*- coding: utf-8 -*-
# imports
import requests
import pandas as pd
import geopandas as gpd
import uuid
import time
import os
import logging
from datetime import datetime
from pathlib import Path

# Initialize logging to log to both file and console
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('api_requests.log'),
        logging.StreamHandler()
    ]
)


In [None]:
def full_api_request(datetime_dt, place_dm, name_dm):

    # Function logs the API response status and handles different HTTP status codes
    def communicate_response(response, place_dm, name_dm, datetime_dt):
        """Handles the response from the API and logs the status."""
        if response.status_code == 200:
            logging.info(f"(200) Request successful for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        elif response.status_code == 204:
            logging.info(f"(204) No departures found for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        elif response.status_code == 400:
            logging.warning(f"(400) Bad request for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        elif response.status_code == 404:
            logging.error(f"(404) Not found for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        elif response.status_code == 500:
            logging.error(f"(500) Internal server error for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        elif response.status_code == 503:
            logging.error(f"(503) Service unavailable for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        elif response.status_code == 429:
            logging.error(f"(429) Too many requests for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        else:
            logging.error(f"Error: {response.status_code} - {response.text}")
        return response

    # Function to create a unique identifier (UUID) for each departure
    def make_uid(stop, scheduled_datetime, line):
        base = f"{stop}|{scheduled_datetime}|{line}"
        return str(uuid.uuid5(uuid.NAMESPACE_DNS, base))
    
    # Function to build the results from the API response
    def build_results(datetime_dt, make_uid, departures):
        results = []
        for dep in departures:
            stop_name = dep.get('stopName')
            platform = dep.get('platformName', dep.get('platform'))
            scheduled = dep.get('dateTime', {})
            real = dep.get('realDateTime', {})
            # Build full datetime for scheduled and real departure
            try:
                scheduled_dt = datetime(
                    int(scheduled.get('year', datetime_dt.year)),
                    int(scheduled.get('month', datetime_dt.month)),
                    int(scheduled.get('day', datetime_dt.day)),
                    int(scheduled.get('hour', 0)),
                    int(scheduled.get('minute', 0))
                )
            except Exception:
                scheduled_dt = None
            try:
                real_dt = datetime(
                    int(real.get('year', datetime_dt.year)),
                    int(real.get('month', datetime_dt.month)),
                    int(real.get('day', datetime_dt.day)),
                    int(real.get('hour', 0)),
                    int(real.get('minute', 0))
                ) if real else None
            except Exception:
                real_dt = None

            line = dep.get('servingLine', {}).get('number')
            direction = dep.get('servingLine', {}).get('direction')
            delay = dep.get('servingLine', {}).get('delay')
            cancelled = dep.get('servingLine', {}).get('cancelled')
            connection_exists = not (str(cancelled) == "1")
            # Additional fields
            delay_reason = dep.get('servingLine', {}).get('delayReason')
            realtime_status = dep.get('servingLine', {}).get('realtimeStatus')
            status_text = dep.get('servingLine', {}).get('statusText')

            uid = make_uid(stop_name, scheduled_dt, line)

            results.append({
                'uuid': uid,
                'stop': stop_name,
                'platform': platform,
                'line': line,
                'direction': direction,
                'scheduled_departure': scheduled_dt,
                'real_departure': real_dt,
                'scheduled_time': scheduled_dt.time() if scheduled_dt else None,
                'scheduled_date_iso': scheduled_dt.date().isoformat() if scheduled_dt else None,
                'delay_min': int(delay) if delay not in (None, '', '-9999') else None,
                'connection_exists': connection_exists,
                'delay_reason': delay_reason,
                'realtime_status': realtime_status,
                'status_text': status_text
            })
            
        return results
    

    # Prepare the parameters for the API request
    params = {
        "language": "de",
        "mode": "direct",
        "outputFormat": "JSON",
        "type_dm": "stop",
        "useProxFootSearch": 0,
        "useRealtime": 1,
        "itdDateDay": datetime_dt.day,
        "itdDateMonth": datetime_dt.month,
        "itdDateYear": datetime_dt.year,
        "itdTimeHour": datetime_dt.hour,
        "itdTimeMinute": datetime_dt.minute,
        "place_dm": place_dm,
        "name_dm": name_dm,
    }

    # Create a text file to store the raw API responses (Debugging purposes)
    textfile = Path("vrr_api_full_responses.txt")
    if not textfile.exists():
        textfile.touch()

    # API URL for the VRR (Verkehrsverbund Rhein-Ruhr) departures
    # This URL is used to fetch the departure information based on the parameters provided
    # The API is expected to return a JSON response with the departure details
    API_URL = "https://efa.vrr.de/standard/XML_DM_REQUEST"

    # Make the API request
    logging.info(f"Making API request for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
    response = requests.get(API_URL, params=params)
    
    # Handle the response
    response = communicate_response(response, place_dm, name_dm, datetime_dt)

    # Check if the response is successful and contains data
    if response.status_code in [200, 204]:
        # Write the raw response to a text file for debugging purposes
        try:
            with open(textfile, "a", encoding="utf-8") as f:
                f.write(response.text + "\n\n")
            logging.info(f"Response written to {textfile}")
        except Exception as e:
            logging.error(f"Error writing to {textfile}: {e}")
        data = response.json()
    else:
        # If the response is not successful, return an empty DataFrame and the status code
        logging.error(f"Failed to fetch data for {place_dm} {name_dm} at {datetime_dt.isoformat()}")
        raise requests.exceptions.RequestException(
            f"Request failed with status code {response.status_code} for {place_dm} {name_dm} at {datetime_dt.isoformat()}"
        )

    # Extract the departure list from the response data
    departures = data.get('departureList', [])

    # Build the results from the departures
    df_departures = pd.DataFrame(build_results(datetime_dt, make_uid, departures))

    # LUT (Lookup Table) for replacing special characters in the stop, direction, and line names
    # This is necessary to ensure that the data is clean and consistent, in this case for German characters
    lut = {"Ã¼": "ü", "Ã¶": "ö", "Ã¤": "ä", "ÃŸ": "ß", "Ã": "ß"}
    for col in ['stop', 'direction', 'line']:
        df_departures[col] = df_departures[col].replace(lut, regex=True)

    # Convert the scheduled and real departure times to ISO format
    df_departures['scheduled_departure'] = pd.to_datetime(df_departures['scheduled_departure'], errors='coerce').dt.strftime('%Y-%m-%dT%H:%M:%S')
    df_departures['real_departure'] = pd.to_datetime(df_departures['real_departure'], errors='coerce').dt.strftime('%Y-%m-%dT%H:%M:%S')
    
    # return df_departures, response.status_code
    return df_departures, response.status_code

def update_geodata(csv_file_path, geodata_file_path, geodata_target):
    """
    For each stop in the DataFrame, get the last 10 departures and add them as lists into the GeoDataFrame.
    The GeoDataFrame uses short column names due to shapefile limitations, so columns are mapped accordingly.
    """
    # Mapping from long names (df) to short names (gdf)
    col_map = {
        'stop': 'stop',
        'line': 'line',
        'direction': 'direct',
        'scheduled_departure': 'schedudep',
        'real_departure': 'realdep',
        'delay_min': 'delay'
    }

    # Load the existing GeoDataFrame
    try:
        gdf = gpd.read_file(geodata_file_path)
        # Ensure columns are short-named in gdf
        gdf.rename(columns=col_map, inplace=True)
        logging.info(f"Loaded existing geodata with {len(gdf)} entries. Columns: {gdf.columns.tolist()}")
    except Exception as e:
        logging.error(f"Error loading geodata: {e}")
        return

    # Load the CSV file into a DataFrame but only the last 200 rows
    try:
        df = pd.read_csv(csv_file_path, nrows=200)
        # Keep only the relevant columns
        df = df[list(col_map.keys())]
        # Rename columns to match the GeoDataFrame
        df.rename(columns=col_map, inplace=True)
        logging.info(f"Loaded CSV with {len(df)} entries. Columns: {df.columns.tolist()}")
    except FileNotFoundError:
        logging.error(f"CSV file {csv_file_path} not found.")
        return

    # Ensure the 'stop' column exists in the DataFrame
    assert 'stop' in df.columns, "The DataFrame does not contain a 'stop' column."

    # for each stop in the DataFrame, get the last 10 departures, make each value into a list and add them to the GeoDataFrame merging on the 'stop' column
    for stop in df['stop'].unique():
        # Get the last 10 departures for the stop
        stop_df = df[df['stop'] == stop].tail(10)

        # Convert each column to a list
        stop_departures = {
            'stop': stop,
            'line': stop_df['line'].tolist(),
            'direct': stop_df['direct'].tolist(),
            'schedudep': stop_df['schedudep'].tolist(),
            'realdep': stop_df['realdep'].tolist(),
            'delay': stop_df['delay'].tolist()
        }

        # Create a DataFrame from the stop_departures dictionary
        stop_departures_df = pd.DataFrame([stop_departures])

        # Add empty columns for any missing columns in the GeoDataFrame
        for col in col_map.values():
            if col not in stop_departures_df.columns:
                stop_departures_df[col] = None

        # Merge with the GeoDataFrame on the 'stop' column
        gdf = gdf.merge(stop_departures_df, on='stop', how='left')
    
    # Save the updated GeoDataFrame back to the shapefile
    try:
        gdf.to_file(geodata_target, driver='ESRI Shapefile')
        logging.info(f"Updated geodata saved to {geodata_target}.")
    except Exception as e:
        logging.error(f"Error saving updated geodata: {e}")
        return


# Main function to handle the API requests and manage the CSV file
def main(delay_min, placename_list):
    total_requests = len(placename_list)
    delay_s = delay_min * 60  # convert minutes to seconds
    request_delay = delay_s / total_requests # time the actual requests so that they space out over the delay time

    # initialize csv
    csv_file = Path('final_departures.csv')
    geodata_file = Path('res/geodata/bahnhoefe.shp')
    geodata_target = Path('data/geodata/bahnhoefe_running.shp')

    # Main loop
    logging.info(f"Total requests: {total_requests}, Delay per request: {round(request_delay/60, 2)} minutes.")
    logging.info("Starting the request loop...")

    # Load existing UUIDs only once at the start
    try:
        existing_df = pd.read_csv(csv_file, usecols=['uuid'])
        existing_uuids = set(existing_df['uuid'].dropna().astype(str))
        logging.info(f"Loaded {len(existing_uuids)} existing UUIDs.")
    except FileNotFoundError:
        existing_uuids = set()
        logging.info("No existing UUIDs found, starting fresh.")

    while True:
        logging.info("Starting a new cycle of requests...")

        for place_dm, name_dm in placename_list:
            try:
                datetime_dt = datetime.now()

                df, status_code = full_api_request(datetime_dt, place_dm, name_dm)

                if not df.empty:
                    df['uuid'] = df['uuid'].astype(str)
                    new_df = df[~df['uuid'].isin(existing_uuids)]

                    if not new_df.empty:
                        new_df.to_csv(csv_file, mode='a', header=not existing_uuids, index=False)
                        existing_uuids.update(new_df['uuid'])

                        # Update the geodata with the new departures
                        update_geodata(csv_file, geodata_file, geodata_target)

                        logging.info(f"Appended {len(new_df)} new departures. Status code: {status_code}")
                    else:
                        logging.info("No new UUIDs to append.")
                else:
                    logging.info(f"No departures found for {place_dm} - {name_dm}. Status code: {status_code}")

                logging.info(f"Sleeping for {round(request_delay/60, 2)} minutes.")
                time.sleep(request_delay)

            except requests.exceptions.RequestException as e:
                logging.error(f"Request failed for {place_dm} - {name_dm} ({status_code}): {e}")
                time.sleep(request_delay)
                continue

            except Exception as e:
                logging.error(f"An error occurred while processing {place_dm} - {name_dm}: {e}")
                time.sleep(request_delay)
                continue

        logging.info("Next cycle...")

In [None]:
# Define the datetime for the request
return
datetime_dt = datetime.now()

# Define the place and name for the stop
place_dm = "Gelsenkirchen"
name_dm = "HBF"

placename_list = [("Duisburg", "HBF"), ("Mönchengladbach", "HBF"), ("Wuppertal", "HBF"), ("Bochum", "HBF"), ("Dortmund", "HBF"), ("Essen", "HBF"), ("Düsseldorf", "HBF")]
placename_list = [("Duisburg", "HBF")]

final_df = pd.DataFrame()
# Make the API request and get the DataFrame
for place_dm, name_dm in placename_list:
    print(f"Requesting data for {place_dm} - {name_dm}")
    df, status_code = full_api_request(datetime_dt, place_dm, name_dm)
    if not df.empty:
        final_df = pd.concat([final_df, df], ignore_index=True)

# Display the final DataFrame
final_df

SyntaxError: 'return' outside function (148001247.py, line 2)

In [None]:
delay_min = 0.5
placename_list = [("Duisburg", "HBF"), ("Mönchengladbach", "HBF"), ("Wuppertal", "HBF"), ("Bochum", "HBF"), ("Dortmund", "HBF"), ("Essen", "HBF"), ("Düsseldorf", "HBF")]

main(delay_min, placename_list)

2025-07-12 13:23:36,441 - INFO - Total requests: 7, Delay per request: 0.07 minutes.
2025-07-12 13:23:36,441 - INFO - Starting the request loop...
2025-07-12 13:23:36,450 - INFO - Loaded 749 existing UUIDs.
2025-07-12 13:23:36,451 - INFO - Starting a new cycle of requests...
2025-07-12 13:23:36,452 - INFO - Making API request for Duisburg HBF at 2025-07-12T13:23:36.452454
2025-07-12 13:23:36,859 - INFO - (200) Request successful for Duisburg HBF at 2025-07-12T13:23:36.452454
2025-07-12 13:23:36,860 - INFO - Response written to vrr_api_full_responses.txt
2025-07-12 13:23:36,897 - INFO - No new UUIDs to append.
2025-07-12 13:23:36,898 - INFO - Sleeping for 0.07 minutes.
2025-07-12 13:23:41,184 - INFO - Making API request for Mönchengladbach HBF at 2025-07-12T13:23:41.184715
2025-07-12 13:23:41,557 - INFO - (200) Request successful for Mönchengladbach HBF at 2025-07-12T13:23:41.184715
2025-07-12 13:23:41,559 - INFO - Response written to vrr_api_full_responses.txt
2025-07-12 13:23:41,570 -

2025-07-12 13:25:08,078 - ERROR - An error occurred while processing Essen - HBF: 'stop'
2025-07-12 13:25:12,368 - INFO - Making API request for Düsseldorf HBF at 2025-07-12T13:25:12.367266
2025-07-12 13:25:12,778 - INFO - (200) Request successful for Düsseldorf HBF at 2025-07-12T13:25:12.367266
2025-07-12 13:25:12,780 - INFO - Response written to vrr_api_full_responses.txt
2025-07-12 13:25:12,814 - INFO - Loaded existing geodata with 7 entries. Columns: ['stop', 'geometry']
2025-07-12 13:25:12,822 - INFO - Loaded CSV with 200 entries. Columns: ['stop', 'line', 'direct', 'schedudep', 'realdep', 'delay']
  gdf.to_file(geodata_file_path, driver='ESRI Shapefile')
  ogr_write(
  ogr_write(
  ogr_write(
2025-07-12 13:25:12,852 - INFO - Created 7 records
2025-07-12 13:25:12,853 - INFO - Updated geodata saved to res/geodata/bahnhoefe.shp.
2025-07-12 13:25:12,854 - INFO - Appended 6 new departures. Status code: 200
2025-07-12 13:25:12,855 - INFO - Sleeping for 0.07 minutes.
2025-07-12 13:25:17